diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore b/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore new file mode 100644 index 0000000..efa69d4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_amazon_sqs +!setup.py diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile b/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile new file mode 100644 index 0000000..9861de2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_amazon_sqs ./destination_amazon_sqs + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.name=airbyte/destination-amazon-sqs diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/README.md b/airbyte-integrations/connectors/destination-amazon-sqs/README.md new file mode 100644 index 0000000..2856f60 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/README.md @@ -0,0 +1,99 @@ +# Amazon Sqs Destination + +This is the repository for the Amazon Sqs destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/amazon-sqs). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/amazon-sqs) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_amazon_sqs/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination amazon-sqs test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-amazon-sqs build +``` + +An image will be built with the tag `airbyte/destination-amazon-sqs:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-amazon-sqs:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-amazon-sqs:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-amazon-sqs:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-amazon-sqs:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-amazon-sqs test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-amazon-sqs test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/amazon-sqs.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md b/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md new file mode 100644 index 0000000..ce91ec1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md @@ -0,0 +1,59 @@ +# Amazon SQS Destination + +## What +This is a connector for producing messages to an [Amazon SQS Queue](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/welcome.html) + +## How +### Sending messages +Amazon SQS allows messages to be sent individually or in batches. Currently, this Destination only supports sending messages individually. This can +have performance implications if sending high volumes of messages. + +#### Message Body +By default, the SQS Message body is built using the AirbyteMessageRecord's 'data' property. + +If the **message_body_key** config item is set, we use the value as a key within the AirbyteMessageRecord's 'data' property. This could be +improved to handle nested keys by using JSONPath syntax to lookup values. + +For example, given the input Record: +``` +{ + "data": + { + "parent_key": { + "nested_key": "nested_value" + }, + "top_key": "top_value" + } +} +``` + +With no **message_body_key** set, the output SQS Message body will be +``` +{ + "parent_key": { + "nested_key": "nested_value" + }, + "top_key": "top_value" +} +``` + +With **message_body_key** set to `parent_key`, the output SQS Message body will be +``` +{ + "nested_key": "nested_value" +} +``` + +#### Message attributes +The airbyte_emmited_at timestamp is added to every message as an Attribute by default. This could be improved to allow the user to set Attributes through the UI, or to take keys from the Record as Attributes. + +#### FIFO Queues +A Queue URL that ends with '.fifo' **must** be a valid FIFO Queue. When the queue is FIFO, the *message_group_id* property is required. + +Currently, a unique uuid4 is generated as the dedupe ID for every message. This could be improved to allow the user to specify a path in the Record +to use as a dedupe ID. + +### Credentials +Requires an AWS IAM Access Key ID and Secret Key. + +This could be improved to add support for configured AWS profiles, env vars etc. diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py new file mode 100644 index 0000000..ff5ba7b --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationAmazonSqs + +__all__ = ["DestinationAmazonSqs"] diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py new file mode 100644 index 0000000..1eb0249 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py @@ -0,0 +1,176 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from typing import Any, Iterable, Mapping +from uuid import uuid4 + +import boto3 +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status, Type +from botocore.exceptions import ClientError + + +class DestinationAmazonSqs(Destination): + def queue_is_fifo(self, url: str) -> bool: + return url.endswith(".fifo") + + def parse_queue_name(self, url: str) -> str: + return url.rsplit("/", 1)[-1] + + def send_single_message(self, queue, message) -> dict: + return queue.send_message(**message) + + def build_sqs_message(self, record, message_body_key=None): + data = None + if message_body_key: + data = record.data.get(message_body_key) + if data is None: + raise Exception("Message had no attribute of the configured Message Body Key: " + message_body_key) + else: + data = json.dumps(record.data) + + message = {"MessageBody": data} + + return message + + def add_attributes_to_message(self, record, message): + attributes = {"airbyte_emitted_at": {"StringValue": str(record.emitted_at), "DataType": "String"}} + message["MessageAttributes"] = attributes + return message + + def set_message_delay(self, message, message_delay): + message["DelaySeconds"] = message_delay + return message + + # MessageGroupID and MessageDeduplicationID are required properties for FIFO queues + # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_SendMessage.html + def set_message_fifo_properties(self, message, message_group_id, use_content_dedupe=False): + # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/using-messagegroupid-property.html + if not message_group_id: + raise Exception("Failed to build message - Message Group ID is required for FIFO queues") + else: + message["MessageGroupId"] = message_group_id + # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/using-messagededuplicationid-property.html + if not use_content_dedupe: + message["MessageDeduplicationId"] = str(uuid4()) + # TODO: Support getting MessageDeduplicationId from a key in the record + # if message_dedupe_id: + # message['MessageDeduplicationId'] = message_dedupe_id + return message + + # TODO: Support batch send + # def send_batch_messages(messages, queue): + # entry = { + # 'Id': "1", + # 'MessageBody': str(record.data), + # } + # response = queue.send_messages(Entries=messages) + # if 'Successful' in response: + # for status in response['Successful']: + # print("Message sent: " + status['MessageId']) + # if 'Failed' in response: + # for status in response['Failed']: + # print("Message sent: " + status['MessageId']) + + # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_SendMessage.html + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + + # Required propeties + queue_url = config["queue_url"] + queue_region = config["region"] + + # TODO: Implement optional params for batch + # Optional Properties + # max_batch_size = config.get("max_batch_size", 10) + # send_as_batch = config.get("send_as_batch", False) + message_delay = config.get("message_delay") + message_body_key = config.get("message_body_key") + + # FIFO Properties + message_group_id = config.get("message_group_id") + + # Senstive Properties + access_key = config["access_key"] + secret_key = config["secret_key"] + + session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region) + sqs = session.resource("sqs") + queue = sqs.Queue(url=queue_url) + + # TODO: Make access/secret key optional, support public access & profiles + # TODO: Support adding/setting attributes in the UI + # TODO: Support extract a specific path as message attributes + + for message in input_messages: + if message.type == Type.RECORD: + sqs_message = self.build_sqs_message(message.record, message_body_key) + + if message_delay: + sqs_message = self.set_message_delay(sqs_message, message_delay) + + sqs_message = self.add_attributes_to_message(message.record, sqs_message) + + if self.queue_is_fifo(queue_url): + use_content_dedupe = False if queue.attributes.get("ContentBasedDeduplication") == "false" else "true" + self.set_message_fifo_properties(sqs_message, message_group_id, use_content_dedupe) + + self.send_single_message(queue, sqs_message) + if message.type == Type.STATE: + yield message + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + try: + # Required propeties + queue_url = config["queue_url"] + logger.debug("Amazon SQS Destination Config Check - queue_url: " + queue_url) + queue_region = config["region"] + logger.debug("Amazon SQS Destination Config Check - region: " + queue_region) + + # Senstive Properties + access_key = config["access_key"] + logger.debug("Amazon SQS Destination Config Check - access_key (ends with): " + access_key[-1]) + secret_key = config["secret_key"] + logger.debug("Amazon SQS Destination Config Check - secret_key (ends with): " + secret_key[-1]) + + logger.debug("Amazon SQS Destination Config Check - Starting connection test ---") + session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region) + sqs = session.resource("sqs") + queue = sqs.Queue(url=queue_url) + if hasattr(queue, "attributes"): + logger.debug("Amazon SQS Destination Config Check - Connection test successful ---") + + if self.queue_is_fifo(queue_url): + fifo = queue.attributes.get("FifoQueue", False) + if not fifo: + raise Exception("FIFO Queue URL set but Queue is not FIFO") + + message_group_id = config.get("message_group_id") + if message_group_id is None: + raise Exception("Message Group ID is not set, but is required for FIFO Queues.") + + # TODO: Support referencing an ID inside the Record to use as de-dupe ID + # message_dedupe_key = config.get("message_dedupe_key") + # content_dedupe = queue.attributes.get('ContentBasedDeduplication') + # if content_dedupe == "false": + # if message_dedupe_id is None: + # raise Exception("You must provide a Message Deduplication ID when ContentBasedDeduplication is not used.") + + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + else: + return AirbyteConnectionStatus( + status=Status.FAILED, message="Amazon SQS Destination Config Check - Could not connect to queue" + ) + except ClientError as e: + return AirbyteConnectionStatus( + status=Status.FAILED, message=f"Amazon SQS Destination Config Check - Error in AWS Client: {str(e)}" + ) + except Exception as e: + return AirbyteConnectionStatus( + status=Status.FAILED, message=f"Amazon SQS Destination Config Check - An exception occurred: {str(e)}" + ) diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json new file mode 100644 index 0000000..f94d7d0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json @@ -0,0 +1,101 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/amazon-sqs", + "supported_destination_sync_modes": ["append"], + "supportsIncremental": true, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Amazon Sqs", + "type": "object", + "required": ["queue_url", "region"], + "additionalProperties": false, + "properties": { + "queue_url": { + "title": "Queue URL", + "description": "URL of the SQS Queue", + "type": "string", + "examples": [ + "https://sqs.eu-west-1.amazonaws.com/1234567890/my-example-queue" + ], + "order": 0 + }, + "region": { + "title": "AWS Region", + "description": "AWS Region of the SQS Queue", + "type": "string", + "enum": [ + "af-south-1", + "ap-east-1", + "ap-northeast-1", + "ap-northeast-2", + "ap-northeast-3", + "ap-south-1", + "ap-south-2", + "ap-southeast-1", + "ap-southeast-2", + "ap-southeast-3", + "ap-southeast-4", + "ca-central-1", + "ca-west-1", + "cn-north-1", + "cn-northwest-1", + "eu-central-1", + "eu-central-2", + "eu-north-1", + "eu-south-1", + "eu-south-2", + "eu-west-1", + "eu-west-2", + "eu-west-3", + "il-central-1", + "me-central-1", + "me-south-1", + "sa-east-1", + "us-east-1", + "us-east-2", + "us-gov-east-1", + "us-gov-west-1", + "us-west-1", + "us-west-2" + ], + "order": 1 + }, + "message_delay": { + "title": "Message Delay", + "description": "Modify the Message Delay of the individual message from the Queue's default (seconds).", + "type": "integer", + "examples": ["15"], + "order": 2 + }, + "access_key": { + "title": "AWS IAM Access Key ID", + "description": "The Access Key ID of the AWS IAM Role to use for sending messages", + "type": "string", + "examples": ["xxxxxHRNxxx3TBxxxxxx"], + "order": 3, + "airbyte_secret": true + }, + "secret_key": { + "title": "AWS IAM Secret Key", + "description": "The Secret Key of the AWS IAM Role to use for sending messages", + "type": "string", + "examples": ["hu+qE5exxxxT6o/ZrKsxxxxxxBhxxXLexxxxxVKz"], + "order": 4, + "airbyte_secret": true + }, + "message_body_key": { + "title": "Message Body Key", + "description": "Use this property to extract the contents of the named key in the input record to use as the SQS message body. If not set, the entire content of the input record data is used as the message body.", + "type": "string", + "examples": ["myDataPath"], + "order": 5 + }, + "message_group_id": { + "title": "Message Group Id", + "description": "The tag that specifies that a message belongs to a specific message group. This parameter applies only to, and is REQUIRED by, FIFO queues.", + "type": "string", + "examples": ["my-fifo-group"], + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg b/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg new file mode 100644 index 0000000..6029b85 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json new file mode 100644 index 0000000..b46977c --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json @@ -0,0 +1 @@ +{"type": "RECORD", "record": {"stream": "ab-airbyte-testing", "data": {"id": "ba0f237b-abf5-41ae-9d94-1dbd346f38dd", "body": "test 1", "attributes": null}, "emitted_at": 1633881878000}} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py new file mode 100644 index 0000000..5d1e711 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py @@ -0,0 +1,46 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from typing import Any, Mapping + +import pytest +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, Status, SyncMode +from destination_amazon_sqs import DestinationAmazonSqs + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + with open("secrets/config.json", "r") as f: + return json.loads(f.read()) + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"string_col": {"type": "str"}, "int_col": {"type": "integer"}}} + + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="append_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="overwrite_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream]) + + +def test_check_valid_config(config: Mapping): + outcome = DestinationAmazonSqs().check(AirbyteLogger(), config) + assert outcome.status == Status.SUCCEEDED + + +def test_check_invalid_config(): + outcome = DestinationAmazonSqs().check(AirbyteLogger(), {"secret_key": "not_a_real_secret"}) + assert outcome.status == Status.FAILED diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/main.py b/airbyte-integrations/connectors/destination-amazon-sqs/main.py new file mode 100644 index 0000000..bc60769 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_amazon_sqs import DestinationAmazonSqs + +if __name__ == "__main__": + DestinationAmazonSqs().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml b/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml new file mode 100644 index 0000000..3676f4f --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: api + connectorType: destination + definitionId: 0eeee7fb-518f-4045-bacc-9619e31c43ea + dockerImageTag: 0.1.1 + dockerRepository: airbyte/destination-amazon-sqs + githubIssueLabel: destination-amazon-sqs + icon: awssqs.svg + license: MIT + name: Amazon SQS + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/amazon-sqs + tags: + - language:python + ab_internal: + sl: 100 + ql: 200 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt b/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json b/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json new file mode 100644 index 0000000..ee132a2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json @@ -0,0 +1,27 @@ +{ + "streams": [ + { + "sync_mode": "full_refresh", + "destination_sync_mode": "append", + "stream": { + "name": "ab-airbyte-testing", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "body": { + "type": "string" + }, + "attributes": { + "type": ["null", "object"] + } + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/setup.py b/airbyte-integrations/connectors/destination-amazon-sqs/setup.py new file mode 100644 index 0000000..f1df000 --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/setup.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "boto3"] + +TEST_REQUIREMENTS = ["pytest~=6.1", "moto"] + +setup( + name="destination_amazon_sqs", + description="Destination implementation for Amazon Sqs.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py new file mode 100644 index 0000000..719671f --- /dev/null +++ b/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py @@ -0,0 +1,226 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import time +from typing import Any, Mapping + +import boto3 +from airbyte_cdk.logger import AirbyteLogger +from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Status +from destination_amazon_sqs import DestinationAmazonSqs + +# from airbyte_cdk.sources.source import Source +from moto import mock_iam, mock_sqs +from moto.core import set_initial_no_auth_action_count + + +@mock_iam +def create_user_with_all_permissions(): + client = boto3.client("iam", region_name="eu-west-1") + client.create_user(UserName="test_user1") + + policy_document = { + "Version": "2012-10-17", + "Statement": [{"Effect": "Allow", "Action": ["sqs:*"], "Resource": "*"}], + } + + client.put_user_policy( + UserName="test_user1", + PolicyName="policy1", + PolicyDocument=json.dumps(policy_document), + ) + + return client.create_access_key(UserName="test_user1")["AccessKey"] + + +def create_config(queue_url, queue_region, access_key, secret_key, message_delay): + return { + "queue_url": queue_url, + "region": queue_region, + "access_key": access_key, + "secret_key": secret_key, + "message_delay": message_delay, + } + + +def create_fifo_config(queue_url, queue_region, access_key, secret_key, message_group_id, message_delay): + return { + "queue_url": queue_url, + "region": queue_region, + "access_key": access_key, + "secret_key": secret_key, + "message_group_id": message_group_id, + "message_delay": message_delay, + } + + +def create_config_with_body_key(queue_url, queue_region, access_key, secret_key, message_body_key, message_delay): + return { + "queue_url": queue_url, + "region": queue_region, + "access_key": access_key, + "secret_key": secret_key, + "message_body_key": message_body_key, + "message_delay": message_delay, + } + + +def get_catalog() -> Mapping[str, Any]: + with open("sample_files/configured_catalog.json", "r") as f: + return json.load(f) + + +@set_initial_no_auth_action_count(3) +@mock_sqs +@mock_iam +def test_check(): + # Create User + user = create_user_with_all_permissions() + # Create Queue + queue_name = "amazon-sqs-mock-queue" + queue_region = "eu-west-1" + client = boto3.client( + "sqs", aws_access_key_id=user["AccessKeyId"], aws_secret_access_key=user["SecretAccessKey"], region_name=queue_region + ) + queue_url = client.create_queue(QueueName=queue_name)["QueueUrl"] + # Create config + config = create_config(queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], 10) + # Create AirbyteLogger + logger = AirbyteLogger() + # Create Destination + destination = DestinationAmazonSqs() + # Run check + status = destination.check(logger, config) + assert status.status == Status.SUCCEEDED + + # Create FIFO queue + fifo_queue_name = "amazon-sqs-mock-queue.fifo" + fif_queue_url = client.create_queue(QueueName=fifo_queue_name, Attributes={"FifoQueue": "true"})["QueueUrl"] + # Create config for FIFO + fifo_config = create_fifo_config(fif_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], "fifo-group", 10) + # Run check + status = destination.check(logger, fifo_config) + assert status.status == Status.SUCCEEDED + + +@set_initial_no_auth_action_count(4) +@mock_sqs +@mock_iam +def test_write(): + # Create User + user = create_user_with_all_permissions() + + test_message = { + "type": "RECORD", + "record": { + "stream": "ab-airbyte-testing", + "data": {"id": "ba0f237b-abf5-41ae-9d94-1dbd346f38dd", "body": "test 1", "attributes": None}, + "emitted_at": 1633881878000, + }, + } + ab_message = AirbyteMessage(**test_message) + + # Common params + message_delay = 1 + queue_region = "eu-west-1" + + # Standard Queue Test + print("## Starting standard queue test ##") + # Create Queue + queue_name = "amazon-sqs-mock-queue" + client = boto3.client( + "sqs", aws_access_key_id=user["AccessKeyId"], aws_secret_access_key=user["SecretAccessKey"], region_name=queue_region + ) + queue_url = client.create_queue(QueueName=queue_name)["QueueUrl"] + # Create config + config = create_config(queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], message_delay) + # Create ConfiguredAirbyteCatalog + catalog = ConfiguredAirbyteCatalog(streams=get_catalog()["streams"]) + # Create Destination + destination = DestinationAmazonSqs() + # Send messages using write() + for message in destination.write(config, catalog, [ab_message]): + print(f"Message Sent with delay of {message_delay} seconds") + # Listen for messages for max 20 seconds + timeout = time.time() + 20 + print("Listening for messages.") + while True: + message_received = client.receive_message(QueueUrl=queue_url) + if message_received.get("Messages"): + print("Message received.") + message_body = json.loads(message_received["Messages"][0]["Body"]) + # Compare the body of the received message, with the body of the message we sent + if message_body == test_message["record"]["data"]: + print("Received message matches for standard queue write.") + assert True + break + else: + continue + if time.time() > timeout: + print("Timed out waiting for message after 20 seconds.") + assert False + + # Standard Queue with a Message Key Test + print("## Starting body key queue test ##") + # Create Queue + key_queue_name = "amazon-sqs-mock-queue-key" + key_queue_url = client.create_queue(QueueName=key_queue_name)["QueueUrl"] + # Create config + message_body_key = "body" + key_config = create_config_with_body_key( + key_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], message_body_key, message_delay + ) + # Send messages using write() + for message in destination.write(key_config, catalog, [ab_message]): + print(f"Message Sent with delay of {message_delay} seconds") + # Listen for messages for max 20 seconds + timeout = time.time() + 20 + print("Listening for messages.") + while True: + message_received = client.receive_message(QueueUrl=key_queue_url) + if message_received.get("Messages"): + print("Message received.") + message_body = message_received["Messages"][0]["Body"] + # Compare the body of the received message, with the body of the message we sent + if message_body == test_message["record"]["data"][message_body_key]: + print("Received message matches for body key queue write.") + assert True + break + else: + continue + if time.time() > timeout: + print("Timed out waiting for message after 20 seconds.") + assert False + + # FIFO Queue Test + print("## Starting FIFO queue test ##") + # Create Queue + fifo_queue_name = "amazon-sqs-mock-queue.fifo" + fifo_queue_url = client.create_queue(QueueName=fifo_queue_name, Attributes={"FifoQueue": "true"})["QueueUrl"] + # Create config + fifo_config = create_fifo_config( + fifo_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], "fifo-group", message_delay + ) + # Send messages using write() + for message in destination.write(fifo_config, catalog, [ab_message]): + print(f"Message Sent with delay of {message_delay} seconds") + # Listen for messages for max 20 seconds + timeout = time.time() + 20 + print("Listening for messages.") + while True: + message_received = client.receive_message(QueueUrl=fifo_queue_url) + if message_received.get("Messages"): + print("Message received.") + message_body = json.loads(message_received["Messages"][0]["Body"]) + # Compare the body of the received message, with the body of the message we sent + if message_body == test_message["record"]["data"]: + print("Received message matches for FIFO queue write.") + assert True + break + else: + continue + if time.time() > timeout: + print("Timed out waiting for message after 20 seconds.") + assert False diff --git a/airbyte-integrations/connectors/destination-cassandra/README.md b/airbyte-integrations/connectors/destination-cassandra/README.md new file mode 100644 index 0000000..21c6cde --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/README.md @@ -0,0 +1,72 @@ +# Destination Cassandra + +This is the repository for the Cassandra destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/cassandra). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-cassandra:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-cassandra:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cassandra:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/cassandra`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/cassandraDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-cassandra:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-cassandra test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/cassandra.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-cassandra/bootstrap.md b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md new file mode 100644 index 0000000..35c1942 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md @@ -0,0 +1,30 @@ +# Cassandra Destination + +Cassandra is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle +large amounts of data across many commodity servers, providing high availability with no single point of failure + +The data is structured in keyspaces and tables and is partitioned and replicated across different nodes in the +cluster. +[Read more about Cassandra](https://cassandra.apache.org/_/index.html) + +This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a Cassandra`keyspace`. +When using destination sync mode `append` and `append_dedup`, an `insert` operation is performed against an existing +Cassandra table. +When using `overwrite`, the records are first placed in a temp table. When all the messages have been received the data +is copied to the final table which is first truncated and the temp table is deleted. + +The Implementation uses the [Datastax](https://github.com/datastax/java-driver) driver in order to access +Cassandra. [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +handles the communication with the Cassandra cluster and internally it uses +the [SessionManager](./src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java) to retrieve a +CqlSession to the cluster. + +The [CassandraMessageConsumer](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java) +class contains the logic for handling airbyte messages, events and copying data between tables. + +## Development + +See the [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java) +class on how to use the datastax driver. + +[Datastax docs.](https://docs.datastax.com/en/developer/java-driver/3.0/) \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/build.gradle b/airbyte-integrations/connectors/destination-cassandra/build.gradle new file mode 100644 index 0000000..b9774a9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/build.gradle @@ -0,0 +1,39 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.cassandra.CassandraDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +def cassandraDriver = '4.13.0' +def assertVersion = '3.21.0' + +dependencies { + + implementation "com.datastax.oss:java-driver-core:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-query-builder:${cassandraDriver}" + implementation "com.datastax.oss:java-driver-mapper-runtime:${cassandraDriver}" + + + // https://mvnrepository.com/artifact/org.assertj/assertj-core + testImplementation "org.assertj:assertj-core:${assertVersion}" + testImplementation libs.testcontainers.cassandra +} diff --git a/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml new file mode 100644 index 0000000..a4786dd --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml @@ -0,0 +1,23 @@ +version: "3.7" + +services: + cassandra1: + image: cassandra:4.0 + ports: + - "9042:9042" + environment: + - "MAX_HEAP_SIZE=2048M" + - "HEAP_NEWSIZE=1024M" + - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" +# Uncomment if you want to run a Cassandra cluster +# cassandra2: +# image: cassandra:4.0 +# ports: +# - "9043:9042" +# environment: +# - "MAX_HEAP_SIZE=2048M" +# - "HEAP_NEWSIZE=1024M" +# - "CASSANDRA_SEEDS=cassandra1" +# - "CASSANDRA_CLUSTER_NAME=cassandra_cluster" +# depends_on: +# - cassandra1 diff --git a/airbyte-integrations/connectors/destination-cassandra/icon.svg b/airbyte-integrations/connectors/destination-cassandra/icon.svg new file mode 100644 index 0000000..26c12ef --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cassandra/metadata.yaml b/airbyte-integrations/connectors/destination-cassandra/metadata.yaml new file mode 100644 index 0000000..7b6e8b0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 707456df-6f4f-4ced-b5c6-03f73bcad1c5 + dockerImageTag: 0.1.4 + dockerRepository: airbyte/destination-cassandra + githubIssueLabel: destination-cassandra + icon: cassandra.svg + license: MIT + name: Cassandra + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/cassandra + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: community +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json new file mode 100644 index 0000000..644fd54 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json @@ -0,0 +1,4 @@ +{ + "username": "paste-username-here", + "password": "paste-password-here" +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java new file mode 100644 index 0000000..5ea984f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Objects; + +/* + * Immutable configuration class for storing cassandra related config. + */ +class CassandraConfig { + + private final String keyspace; + + private final String username; + + private final String password; + + private final String address; + + private final int port; + + private final String datacenter; + + private final int replication; + + public CassandraConfig(String keyspace, + String username, + String password, + String address, + int port, + String datacenter, + int replication) { + this.keyspace = keyspace; + this.username = username; + this.password = password; + this.address = address; + this.port = port; + this.datacenter = datacenter; + this.replication = replication; + } + + public CassandraConfig(JsonNode config) { + this.keyspace = config.get("keyspace").asText(); + this.username = config.get("username").asText(); + this.password = config.get("password").asText(); + this.address = config.get("address").asText(); + this.port = config.get("port").asInt(9042); + this.datacenter = config.get("datacenter").asText("datacenter1"); + this.replication = config.get("replication").asInt(1); + } + + public String getKeyspace() { + return keyspace; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + public String getAddress() { + return address; + } + + public int getPort() { + return port; + } + + public String getDatacenter() { + return datacenter; + } + + public int getReplication() { + return replication; + } + + @Override + public String toString() { + return "CassandraConfig{" + + "keyspace='" + keyspace + '\'' + + ", username='" + username + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", port=" + port + + ", datacenter='" + datacenter + '\'' + + ", replication=" + replication + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + CassandraConfig that = (CassandraConfig) o; + return port == that.port && username.equals(that.username) && password.equals(that.password) && + address.equals(that.address) && datacenter.equals(that.datacenter); + } + + @Override + public int hashCode() { + return Objects.hash(username, password, address, port, datacenter); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java new file mode 100644 index 0000000..0e48b8d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.now; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.BoundStatement; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.metadata.TokenMap; +import com.datastax.oss.driver.api.core.type.DataTypes; +import com.datastax.oss.driver.api.core.uuid.Uuids; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.datastax.oss.driver.api.querybuilder.SchemaBuilder; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import java.io.Closeable; +import java.time.Instant; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraCqlProvider implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraCqlProvider.class); + + private static final int N_THREADS = Runtime.getRuntime().availableProcessors(); + + private final ExecutorService executorService; + + private final CqlSession cqlSession; + + private final CassandraConfig cassandraConfig; + + private final String columnId; + + private final String columnData; + + private final String columnTimestamp; + + public CassandraCqlProvider(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + this.cqlSession = SessionManager.initSession(cassandraConfig); + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.columnId = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_AB_ID); + this.columnData = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_DATA); + this.columnTimestamp = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + this.executorService = Executors.newFixedThreadPool(N_THREADS); + } + + public void createKeySpaceIfNotExists(String keyspace, int replicationFactor) { + var query = SchemaBuilder.createKeyspace(keyspace) + .ifNotExists() + .withSimpleStrategy(replicationFactor) + .build(); + cqlSession.execute(query); + } + + public void createTableIfNotExists(String keyspace, String tableName) { + var query = SchemaBuilder.createTable(keyspace, tableName) + .ifNotExists() + .withPartitionKey(columnId, DataTypes.UUID) + .withColumn(columnData, DataTypes.TEXT) + .withColumn(columnTimestamp, DataTypes.TIMESTAMP) + .build(); + cqlSession.execute(query); + } + + public void dropTableIfExists(String keyspace, String tableName) { + var query = SchemaBuilder.dropTable(keyspace, tableName) + .ifExists() + .build(); + cqlSession.execute(query); + } + + public void insert(String keyspace, String tableName, String jsonData) { + var query = QueryBuilder.insertInto(keyspace, tableName) + .value(columnId, QueryBuilder.literal(Uuids.random())) + .value(columnData, QueryBuilder.literal(jsonData)) + .value(columnTimestamp, QueryBuilder.toTimestamp(now())) + .build(); + cqlSession.execute(query); + } + + public void truncate(String keyspace, String tableName) { + var query = QueryBuilder.truncate(keyspace, tableName).build(); + cqlSession.execute(query); + } + + public List select(String keyspace, String tableName) { + var query = QueryBuilder.selectFrom(keyspace, tableName) + .columns(columnId, columnData, columnTimestamp) + .build(); + return cqlSession.execute(query) + .map(result -> new CassandraRecord( + result.get(columnId, UUID.class), + result.get(columnData, String.class), + result.get(columnTimestamp, Instant.class))) + .all(); + } + + public List>> retrieveMetadata() { + return cqlSession.getMetadata().getKeyspaces().values().stream() + .map(keyspace -> Tuple.of(keyspace.getName().toString(), keyspace.getTables().values() + .stream() + .map(table -> table.getName().toString()) + .collect(Collectors.toList()))) + .collect(Collectors.toList()); + } + + public void copy(String keyspace, String sourceTable, String destinationTable) { + var select = String.format("SELECT * FROM %s.%s WHERE token(%s) > ? AND token(%s) <= ?", + keyspace, sourceTable, columnId, columnId); + + var selectStatement = cqlSession.prepare(select); + + var insert = String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (?, ?, ?)", + keyspace, destinationTable, columnId, columnData, columnTimestamp); + + var insertStatement = cqlSession.prepare(insert); + + // perform full table scan in parallel using token ranges + // optimal for copying large amounts of data + cqlSession.getMetadata().getTokenMap() + .map(TokenMap::getTokenRanges) + .orElseThrow(IllegalStateException::new) + .stream() + .flatMap(range -> range.unwrap().stream()) + .map(range -> selectStatement.bind(range.getStart(), range.getEnd())) + // explore datastax 4.x async api as an alternative for async processing + .map(selectBoundStatement -> executorService.submit(() -> asyncInsert(selectBoundStatement, insertStatement))) + .forEach(this::awaitThread); + + } + + private void asyncInsert(BoundStatement select, PreparedStatement insert) { + var boundStatements = cqlSession.execute(select).all().stream() + .map(r -> CassandraRecord.of( + r.get(columnId, UUID.class), + r.get(columnData, String.class), + r.get(columnTimestamp, Instant.class))) + .map(r -> insert.bind(r.getId(), r.getData(), r.getTimestamp())).toList(); + + boundStatements.forEach(boundStatement -> { + var resultSetCompletionStage = cqlSession.executeAsync(boundStatement); + resultSetCompletionStage.whenCompleteAsync((res, err) -> { + if (err != null) { + LOGGER.error("Something went wrong during async insertion: " + err.getMessage()); + } + }); + }); + } + + private void awaitThread(Future future) { + try { + future.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.error("Interrupted thread while copying data with reason: ", e); + } catch (ExecutionException e) { + LOGGER.error("Error while copying data with reason: ", e); + } + } + + @Override + public void close() { + // wait for tasks completion and terminate executor gracefully + executorService.shutdown(); + // close cassandra session for the given config + SessionManager.closeSession(cassandraConfig); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java new file mode 100644 index 0000000..e2727ba --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new CassandraDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + var cassandraConfig = new CassandraConfig(config); + // add random uuid to avoid conflicts with existing tables. + String tableName = "table_" + UUID.randomUUID().toString().replace("-", ""); + CassandraCqlProvider cassandraCqlProvider = null; + try { + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + // check connection and write permissions + cassandraCqlProvider.createKeySpaceIfNotExists(cassandraConfig.getKeyspace(), + cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(cassandraConfig.getKeyspace(), tableName); + cassandraCqlProvider.insert(cassandraConfig.getKeyspace(), tableName, "{}"); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Can't establish Cassandra connection with reason: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (cassandraCqlProvider != null) { + try { + cassandraCqlProvider.dropTableIfExists(cassandraConfig.getKeyspace(), tableName); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", tableName, e); + } + cassandraCqlProvider.close(); + } + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog configuredCatalog, + final Consumer outputRecordCollector) { + final CassandraConfig cassandraConfig = new CassandraConfig(config); + final CassandraCqlProvider cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + return new CassandraMessageConsumer(cassandraConfig, configuredCatalog, cassandraCqlProvider, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java new file mode 100644 index 0000000..803cde8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +class CassandraMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(CassandraMessageConsumer.class); + + private final CassandraConfig cassandraConfig; + + private final Consumer outputRecordCollector; + + private final Map cassandraStreams; + + private final CassandraCqlProvider cassandraCqlProvider; + + public CassandraMessageConsumer(final CassandraConfig cassandraConfig, + final ConfiguredAirbyteCatalog configuredCatalog, + final CassandraCqlProvider provider, + final Consumer outputRecordCollector) { + this.cassandraConfig = cassandraConfig; + this.outputRecordCollector = outputRecordCollector; + this.cassandraCqlProvider = provider; + var nameTransformer = new CassandraNameTransformer(cassandraConfig); + this.cassandraStreams = configuredCatalog.getStreams().stream() + .collect(Collectors.toUnmodifiableMap( + AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + k -> new CassandraStreamConfig( + nameTransformer.outputKeyspace(k.getStream().getNamespace()), + nameTransformer.outputTable(k.getStream().getName()), + nameTransformer.outputTmpTable(k.getStream().getName()), + k.getDestinationSyncMode()))); + } + + @Override + protected void startTracked() { + cassandraStreams.forEach((k, v) -> { + cassandraCqlProvider.createKeySpaceIfNotExists(v.getKeyspace(), cassandraConfig.getReplication()); + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTempTableName()); + }); + } + + @Override + protected void acceptTracked(final AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + var streamConfig = + cassandraStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + var data = Jsons.serialize(messageRecord.getData()); + cassandraCqlProvider.insert(streamConfig.getKeyspace(), streamConfig.getTempTableName(), data); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + outputRecordCollector.accept(message); + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + @Override + protected void close(final boolean hasFailed) { + if (!hasFailed) { + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTableName()); + switch (v.getDestinationSyncMode()) { + case APPEND -> { + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + case OVERWRITE -> { + cassandraCqlProvider.truncate(v.getKeyspace(), v.getTableName()); + cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + default -> throw new UnsupportedOperationException(); + } + } catch (final Exception e) { + LOGGER.error("Error while copying data to table {}: : ", v.getTableName(), e); + } + }); + } + + cassandraStreams.forEach((k, v) -> { + try { + cassandraCqlProvider.dropTableIfExists(v.getKeyspace(), v.getTempTableName()); + } catch (final Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", v.getTempTableName(), e); + } + }); + cassandraCqlProvider.close(); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java new file mode 100644 index 0000000..da7f60b --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.google.common.base.CharMatcher; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.text.Names; + +class CassandraNameTransformer extends StandardNameTransformer { + + private final CassandraConfig cassandraConfig; + + public CassandraNameTransformer(CassandraConfig cassandraConfig) { + this.cassandraConfig = cassandraConfig; + } + + String outputKeyspace(String namespace) { + if (namespace == null || namespace.isBlank()) { + return cassandraConfig.getKeyspace(); + } + return CharMatcher.is('_').trimLeadingFrom(Names.toAlphanumericAndUnderscore(namespace)); + } + + String outputTable(String streamName) { + var tableName = super.getRawTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputTmpTable(String streamName) { + var tableName = super.getTmpTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a cassandra table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputColumn(String columnName) { + return Names.doubleQuote(columnName.toLowerCase()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java new file mode 100644 index 0000000..63af6d9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import java.time.Instant; +import java.util.UUID; + +class CassandraRecord { + + private final UUID id; + + private final String data; + + private final Instant timestamp; + + public CassandraRecord(UUID id, String data, Instant timestamp) { + this.id = id; + this.data = data; + this.timestamp = timestamp; + } + + static CassandraRecord of(UUID id, String data, Instant timestamp) { + return new CassandraRecord(id, data, timestamp); + } + + public UUID getId() { + return id; + } + + public String getData() { + return data; + } + + public Instant getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "CassandraRecord{" + + "id=" + id + + ", data='" + data + '\'' + + ", timestamp=" + timestamp + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java new file mode 100644 index 0000000..dd7d85d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.protocol.models.v0.DestinationSyncMode; + +/* + * Immutable configuration class for storing destination stream config. + */ +class CassandraStreamConfig { + + private final String keyspace; + + private final String tableName; + + private final String tempTableName; + + private final DestinationSyncMode destinationSyncMode; + + public CassandraStreamConfig(String keyspace, + String tableName, + String tempTableName, + DestinationSyncMode destinationSyncMode) { + this.keyspace = keyspace; + this.tableName = tableName; + this.tempTableName = tempTableName; + this.destinationSyncMode = destinationSyncMode; + } + + public String getKeyspace() { + return keyspace; + } + + public String getTableName() { + return tableName; + } + + public String getTempTableName() { + return tempTableName; + } + + public DestinationSyncMode getDestinationSyncMode() { + return destinationSyncMode; + } + + @Override + public String toString() { + return "CassandraStreamConfig{" + + "keyspace='" + keyspace + '\'' + + ", tableName='" + tableName + '\'' + + ", tempTableName='" + tempTableName + '\'' + + ", destinationSyncMode=" + destinationSyncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java new file mode 100644 index 0000000..3837725 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.datastax.oss.driver.api.core.CqlSession; +import java.net.InetSocketAddress; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +class SessionManager { + + // AtomicInteger is used for convenience, this class is not thread safe + // and needs additional synchronization for that. + private static final ConcurrentHashMap> sessions; + + static { + sessions = new ConcurrentHashMap<>(); + } + + private SessionManager() { + + } + + /* + * CqlSession objects are heavyweight and can hold several tcp connections to the Cassandra cluster, + * for that reason it is better if sessions are reused per configuration. Sessions are thread-safe + * and can be accessed from different threads. + * + */ + public static CqlSession initSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession != null) { + cachedSession.value2().incrementAndGet(); + return cachedSession.value1(); + } else { + var session = CqlSession.builder() + .withLocalDatacenter(cassandraConfig.getDatacenter()) + .addContactPoint(new InetSocketAddress(cassandraConfig.getAddress(), cassandraConfig.getPort())) + .withAuthCredentials(cassandraConfig.getUsername(), cassandraConfig.getPassword()) + .build(); + sessions.put(cassandraConfig, Tuple.of(session, new AtomicInteger(1))); + return session; + } + } + + /* + * Close session configured with cassandra config. if the session is being used by more than one + * external instance only decrease the usage count, otherwise close the session and remove it from + * the map. + * + */ + public static void closeSession(CassandraConfig cassandraConfig) { + var cachedSession = sessions.get(cassandraConfig); + if (cachedSession == null) { + throw new IllegalStateException("No session for the provided config"); + } + int count = cachedSession.value2().decrementAndGet(); + if (count < 1) { + cachedSession.value1().close(); + sessions.remove(cassandraConfig); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java new file mode 100644 index 0000000..224f9b9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +public class Tuple { + + private final V1 value1; + + private final V2 value2; + + public Tuple(V1 value1, V2 value2) { + this.value1 = value1; + this.value2 = value2; + } + + public static Tuple of(V1 value1, V2 value2) { + return new Tuple<>(value1, value2); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + @Override + public String toString() { + return "Tuple{" + + "value1=" + value1 + + ", value2=" + value2 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json new file mode 100644 index 0000000..fac77fe --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json @@ -0,0 +1,65 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/cassandra", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Cassandra Destination Spec", + "type": "object", + "required": ["keyspace", "username", "password", "address", "port"], + "additionalProperties": true, + "properties": { + "keyspace": { + "title": "Keyspace", + "description": "Default Cassandra keyspace to create data in.", + "type": "string", + "order": 0 + }, + "username": { + "title": "Username", + "description": "Username to use to access Cassandra.", + "type": "string", + "order": 1 + }, + "password": { + "title": "Password", + "description": "Password associated with Cassandra.", + "type": "string", + "airbyte_secret": true, + "order": 2 + }, + "address": { + "title": "Address", + "description": "Address to connect to.", + "type": "string", + "examples": ["localhost,127.0.0.1"], + "order": 3 + }, + "port": { + "title": "Port", + "description": "Port of Cassandra.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9042, + "order": 4 + }, + "datacenter": { + "title": "Datacenter", + "description": "Datacenter of the cassandra cluster.", + "type": "string", + "default": "datacenter1", + "order": 5 + }, + "replication": { + "title": "Replication factor", + "type": "integer", + "description": "Indicates to how many nodes the data should be replicated to.", + "default": 1, + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java new file mode 100644 index 0000000..76cb904 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import org.testcontainers.containers.CassandraContainer; + +class CassandraContainerInitializr { + + private static ConfiguredCassandraContainer cassandraContainer; + + private CassandraContainerInitializr() { + + } + + public static ConfiguredCassandraContainer initContainer() { + if (cassandraContainer == null) { + cassandraContainer = new ConfiguredCassandraContainer(); + } + cassandraContainer.start(); + return cassandraContainer; + } + + public static class ConfiguredCassandraContainer extends CassandraContainer { + + ConfiguredCassandraContainer() { + // latest compatible version with the internal testcontainers datastax driver. + super("cassandra:3.11.11"); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java new file mode 100644 index 0000000..9f0ebae --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraCqlProviderIT { + + private static final String CASSANDRA_KEYSPACE = "cassandra_keyspace"; + + private static final String CASSANDRA_TABLE = "cassandra_table"; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var cassandraContainer = CassandraContainerInitializr.initContainer(); + var cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + this.nameTransformer = new CassandraNameTransformer(cassandraConfig); + cassandraCqlProvider.createKeySpaceIfNotExists(CASSANDRA_KEYSPACE, 1); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @AfterEach + void clean() { + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + } + + @Test + void testCreateKeySpaceIfNotExists() { + String keyspace = nameTransformer.outputKeyspace("test_keyspace"); + assertDoesNotThrow(() -> cassandraCqlProvider.createKeySpaceIfNotExists(keyspace, 1)); + } + + @Test + void testCreateTableIfNotExists() { + String table = nameTransformer.outputTable("test_stream"); + assertDoesNotThrow(() -> cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testInsert() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + + @Test + void testTruncate() { + // given + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}"); + + // when + cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .isEmpty(); + } + + @Test + void testDropTableIfExists() { + // given + String table = nameTransformer.outputTmpTable("test_stream"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table); + + // when + cassandraCqlProvider.dropTableIfExists(CASSANDRA_KEYSPACE, table); + + // then + assertThrows(InvalidQueryException.class, () -> cassandraCqlProvider.select(CASSANDRA_KEYSPACE, table)); + } + + @Test + void testCopy() { + // given + String tmpTable = nameTransformer.outputTmpTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, tmpTable); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data1\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data2\"}"); + cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data3\"}"); + + String rawTable = nameTransformer.outputTable("test_stream_copy"); + cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, rawTable); + + // when + cassandraCqlProvider.copy(CASSANDRA_KEYSPACE, tmpTable, rawTable); + var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, rawTable); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java new file mode 100644 index 0000000..44c7bf0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.commons.json.Jsons; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; + +public class CassandraDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private JsonNode configJson; + + private CassandraCqlProvider cassandraCqlProvider; + + private CassandraNameTransformer cassandraNameTransformer; + + private static CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + static void initContainer() { + cassandraContainer = CassandraContainerInitializr.initContainer(); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + configJson = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + HostPortResolver.resolveHost(cassandraContainer), + HostPortResolver.resolvePort(cassandraContainer)); + final var cassandraConfig = new CassandraConfig(configJson); + cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + cassandraCqlProvider.retrieveMetadata().forEach(meta -> { + final var keyspace = meta.value1(); + meta.value2().forEach(table -> cassandraCqlProvider.truncate(keyspace, table)); + }); + } + + @Override + protected String getImageName() { + return "airbyte/destination-cassandra:dev"; + } + + @Override + protected JsonNode getConfig() { + return configJson; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected JsonNode getFailCheckConfig() { + return TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.192.1", + 8080); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) { + final var keyspace = cassandraNameTransformer.outputKeyspace(namespace); + final var table = cassandraNameTransformer.outputTable(streamName); + return cassandraCqlProvider.select(keyspace, table).stream() + .sorted(Comparator.comparing(CassandraRecord::getTimestamp)) + .map(CassandraRecord::getData) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java new file mode 100644 index 0000000..ea30e16 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.integrations.destination.cassandra.CassandraContainerInitializr.ConfiguredCassandraContainer; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraDestinationIT { + + private CassandraDestination cassandraDestination; + + private ConfiguredCassandraContainer cassandraContainer; + + @BeforeAll + void setup() { + this.cassandraContainer = CassandraContainerInitializr.initContainer(); + this.cassandraDestination = new CassandraDestination(); + } + + @Test + void testCheckWithStatusSucceeded() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + } + + @Test + void testCheckWithStatusFailed() { + + var jsonConfiguration = TestDataFactory.createJsonConfig( + "usr", + "pw", + "192.0.2.1", + 8080); + + var connectionStatus = cassandraDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java new file mode 100644 index 0000000..678301d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.util.function.Function; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer.OrderAnnotation; +import org.junit.jupiter.api.Order; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.TestMethodOrder; + +@TestMethodOrder(OrderAnnotation.class) +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraMessageConsumerIT { + + private static final String AIRBYTE_NAMESPACE_1 = "airbyte_namespace_1"; + private static final String AIRBYTE_NAMESPACE_2 = "airbyte_namespace_2"; + + private static final String AIRBYTE_STREAM_1 = "airbyte_stream_1"; + private static final String AIRBYTE_STREAM_2 = "airbyte_stream_2"; + + private CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer; + + private CassandraConfig cassandraConfig; + + private CassandraMessageConsumer cassandraMessageConsumer; + + private CassandraNameTransformer nameTransformer; + + @BeforeAll + void setup() { + cassandraContainer = CassandraContainerInitializr.initContainer(); + cassandraConfig = TestDataFactory.createCassandraConfig( + cassandraContainer.getUsername(), + cassandraContainer.getPassword(), + cassandraContainer.getHost(), + cassandraContainer.getFirstMappedPort()); + + final var stream1 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1); + final var stream2 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2); + + final var cStream1 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.APPEND, stream1); + final var cStream2 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.OVERWRITE, stream2); + + final var catalog = TestDataFactory.createConfiguredAirbyteCatalog(cStream1, cStream2); + + final CassandraCqlProvider cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig); + cassandraMessageConsumer = new CassandraMessageConsumer(cassandraConfig, catalog, cassandraCqlProvider, message -> {}); + nameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @AfterAll + void close() { + cassandraContainer.close(); + } + + @Test + @Order(1) + void testStartTracked() { + assertDoesNotThrow(() -> cassandraMessageConsumer.startTracked()); + } + + @Test + @Order(2) + void testAcceptTracked() { + + final Function function = + data -> Jsons.jsonNode(ImmutableMap.builder().put("property", data).build()); + + assertDoesNotThrow(() -> { + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data1"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1, + function.apply("data2"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data3"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data4"))); + cassandraMessageConsumer.acceptTracked( + TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.STATE, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2, + function.apply("data5"))); + }); + + } + + @Test + @Order(3) + void testClose() { + + assertDoesNotThrow(() -> cassandraMessageConsumer.close(false)); + + } + + @Test + @Order(4) + void testFinalState() { + final var keyspace1 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_1); + final var keyspace2 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_2); + final var table1 = nameTransformer.outputTable(AIRBYTE_STREAM_1); + final var table2 = nameTransformer.outputTable(AIRBYTE_STREAM_2); + try (final var cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig)) { + final var resultSet1 = cassandraCqlProvider.select(keyspace1, table1); + final var resultSet2 = cassandraCqlProvider.select(keyspace2, table2); + assertThat(resultSet1) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")); + + assertThat(resultSet2) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data4\"}")); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000..da3af9e --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH)); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java new file mode 100644 index 0000000..c425481 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class CassandraConfigTest { + + private CassandraConfig cassandraConfig; + + @BeforeEach + void setup() { + var jsonNode = TestDataFactory.createJsonConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraConfig = new CassandraConfig(jsonNode); + } + + @Test + void testConfig() { + + assertThat(cassandraConfig) + .hasFieldOrPropertyWithValue("keyspace", "default_keyspace") + .hasFieldOrPropertyWithValue("username", "usr") + .hasFieldOrPropertyWithValue("password", "pw") + .hasFieldOrPropertyWithValue("address", "127.0.0.1") + .hasFieldOrPropertyWithValue("port", 9042) + .hasFieldOrPropertyWithValue("datacenter", "datacenter1") + .hasFieldOrPropertyWithValue("replication", 1); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java new file mode 100644 index 0000000..6922de7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class CassandraNameTransformerTest { + + private CassandraNameTransformer cassandraNameTransformer; + + @BeforeAll + void setup() { + var cassandraConfig = TestDataFactory.createCassandraConfig( + "usr", + "pw", + "127.0.0.1", + 9042); + this.cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig); + } + + @Test + void testOutputTable() { + + var table = cassandraNameTransformer.outputTable("stream_name"); + + assertThat(table).matches("airbyte_raw_stream_name"); + + } + + @Test + void testOutputTmpTable() { + + var table = cassandraNameTransformer.outputTmpTable("stream_name"); + + assertThat(table).matches("airbyte_tmp_+[a-z]+_stream_name"); + + } + + @Test + void testOutputKeyspace() { + + var keyspace = cassandraNameTransformer.outputKeyspace("***keyspace^h"); + + assertThat(keyspace).matches("keyspace_h"); + + } + + @Test + void outputColumn() { + + var column = cassandraNameTransformer.outputColumn("_airbyte_data"); + + assertThat(column).matches("\"_airbyte_data\""); + + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java new file mode 100644 index 0000000..dc35e4b --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.function.Consumer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +public class CassandraRecordConsumerTest extends PerStreamStateMessageTest { + + @Mock + private Consumer outputRecordCollector; + + @InjectMocks + private CassandraMessageConsumer consumer; + @Mock + private CassandraConfig config; + @Mock + private ConfiguredAirbyteCatalog catalog; + @Mock + private CassandraCqlProvider provider; + + @BeforeEach + public void init() { + consumer = new CassandraMessageConsumer(config, catalog, provider, outputRecordCollector); + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + +} diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java new file mode 100644 index 0000000..da3af9e --- /dev/null +++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.cassandra; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import io.airbyte.protocol.models.v0.SyncMode; +import java.time.Instant; +import java.util.List; + +public class TestDataFactory { + + private TestDataFactory() { + + } + + static CassandraConfig createCassandraConfig(String username, String password, String address, int port) { + return new CassandraConfig( + "default_keyspace", + username, + password, + address, + port, + "datacenter1", + 1); + } + + static JsonNode createJsonConfig(String username, String password, String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", username) + .put("password", password) + .put("address", address) + .put("port", port) + .put("datacenter", "datacenter1") + .put("replication", 1) + .build()); + } + + static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type, + String streamName, + String namespace, + JsonNode data) { + return new AirbyteMessage() + .withType(type) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withData(data) + .withEmittedAt(Instant.now().toEpochMilli())); + } + + static AirbyteStream createAirbyteStream(String name, String namespace) { + return new AirbyteStream() + .withName(name) + .withNamespace(namespace) + .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH)); + } + + static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withDestinationSyncMode(syncMode) + .withStream(stream); + } + + static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) { + return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams)); + } + +} diff --git a/airbyte-integrations/connectors/destination-cumulio/Dockerfile b/airbyte-integrations/connectors/destination-cumulio/Dockerfile new file mode 100644 index 0000000..90e3f08 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/Dockerfile @@ -0,0 +1,42 @@ +FROM python:3.9.11 as base +# FROM python:3.9.11-alpine3.15 as base +# switched from alpine as there were tons of errors (in case you want to switch back to alpine) +# - https://stackoverflow.com/a/57485724/5246670 +# - numpy error: https://stackoverflow.com/a/22411624/5246670 +# - libstdc++ https://github.com/amancevice/docker-pandas/issues/12#issuecomment-717215043 +# - musl-dev linux-headers g++ because of: https://stackoverflow.com/a/40407099/5246670 + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apt-get update && apt-get -y upgrade \ + && pip install --upgrade pip + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . +# build a clean environment +FROM base +# RUN conda install -c conda-forge python-duckdb +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +#adding duckdb manually (outside of setup.py - lots of errors) +RUN pip install duckdb + +# copy payload code only +COPY main.py ./ +COPY destination_cumulio ./destination_cumulio + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-cumulio diff --git a/airbyte-integrations/connectors/destination-cumulio/README.md b/airbyte-integrations/connectors/destination-cumulio/README.md new file mode 100644 index 0000000..6226110 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/README.md @@ -0,0 +1,98 @@ +# Cumulio Destination + +This is the repository for the Cumulio destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/destinations/cumulio). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/destinations/cumulio) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_cumulio/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination cumulio test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py write --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-cumulio build +``` + +An image will be built with the tag `airbyte/destination-cumulio:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-cumulio:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-cumulio:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cumulio:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cumulio:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-cumulio test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-cumulio test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/cumulio.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py new file mode 100644 index 0000000..5dda7de --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2022 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationCumulio + +__all__ = ["DestinationCumulio"] diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py new file mode 100644 index 0000000..10728e3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py @@ -0,0 +1,367 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import time +from logging import Logger +from typing import Any, Mapping + +from cumulio.cumulio import Cumulio # type: ignore + +# def _retry_with_backoff( +# fn: Callable, +# backoff_times_in_seconds: list[int] +# ): +# while True: +# try: +# return fn() + + +class CumulioClient: + # Cumul.io will auto-generate a UUID that is unique to the dataset created. + # To ensure a consistent flow to the same dataset, we'll add a tag to the dataset: + # the tag is a combination of the prefix below and the stream name. + # This allows us to retrieve the same dataset resource upon further sync schedules. + TAG_PREFIX = "[AIRBYTE - DO NOT DELETE] - " + + REPLACE_TAG = "REPLACE DATA" + + INITIAL_DATASET_NAME_PREFIX = "Airbyte - " + + BACKOFF_TIMES_IN_SECONDS = [300, 600, 1200] + + def __init__(self, config: Mapping[str, Any], logger: Logger): + self.logger = logger + self.client = Cumulio(config["api_key"], config["api_token"], config["api_host"]) + + def batch_write( + self, + stream_name: str, + write_buffer: list, + column_headers: list, + is_in_overwrite_sync_mode: bool, + is_first_batch: bool, + update_metadata: bool, + ): + """Write a list of data (array of arrays) in a specific sync mode to Cumul.io.""" + if len(write_buffer) == 0 or (len(write_buffer) == 1 and len(write_buffer[0]) == 0): + return + + dataset_id = self._get_dataset_id_from_stream_name(stream_name) + if dataset_id is None: + dataset_id = self._push_batch_to_new_dataset(stream_name, write_buffer, column_headers) + else: + is_in_replace_mode = self._dataset_contains_replace_tag(dataset_id) + first_batch_replace = is_first_batch and (is_in_overwrite_sync_mode or is_in_replace_mode) + self._push_batch_to_existing_dataset( + dataset_id, + write_buffer, + column_headers, + first_batch_replace, + update_metadata, + ) + + self.logger.info(f"Successfully pushed {len(write_buffer)} rows to Cumul.io's data warehouse in a dataset with id {dataset_id}.") + + def test_api_token(self): + """Test an API key and token by retrieving it.""" + self.logger.info("Checking API host, key and token.") + data = self.client.get("authorization", {"where": {"type": "api"}}) + # if response contains a count 0, the API host, key and token combination is unknown to Cumul.io. + if data["count"] == 0: + raise Exception( + "Unknown combination of API host, key and token. Can you verify whether you've specified the correct combination of " + "Cumul.io API host, key, and token?" + ) + self.logger.info("API host, key and token combination is valid.") + + def test_data_push(self, stream_name: str, data: list[list[Any]], columns: list[str]): + """[DEPRECATED] This method is no longer in use as it results in a lot of overhead. + Test pushing dummy data into a dataset, and delete the dataset afterwards.""" + + self.logger.info("Starting data push of dummy data.") + self.batch_write(stream_name, data, columns, True, True, True) + self.logger.info("Finished data push of dummy data. Will delete dummy dataset.") + + self.delete_dataset(stream_name) + self.logger.info("Finished deleting dummy dataset.") + + def delete_dataset(self, stream_name: str): + """Delete a dataset in Cumul.io. + This should only be used for testing purposes. Currently used in: + - Integration tests + - When pushing dummy data to an example dataset during "check" of Airbyte destination connector (see destination.py check method) + """ + dataset_id = self._get_dataset_id_from_stream_name(stream_name) + if dataset_id is not None: + return self.client.delete("securable", dataset_id) + + self.logger.info(f"No dataset for stream {stream_name} found to delete.") + + def get_ordered_columns(self, stream_name: str): + """Return a list of ordered columns (based on their order in Cumul.io). + The dataset is retrieved based on a Cumul.io tag that includes the stream_name. + """ + dataset_and_columns = self.get_dataset_and_columns_from_stream_name(stream_name) + if dataset_and_columns is None: + # Dataset hasn't been created yet on Cumul.io's side. + return [] + # Sort columns based on the order property. + order_sorted_columns = sorted(dataset_and_columns["columns"], key=lambda x: x["order"]) + # Return a list of column source names. + return [column["source_name"] for column in order_sorted_columns] + + def get_dataset_and_columns_from_stream_name(self, stream_name: str): + """Return a dataset and its columns based on a Cumul.io tag that includes the stream_name.""" + result = self.client.get( + "securable", + { + "where": {"type": "dataset"}, + "attributes": ["id", "name"], + "include": [ + { + "model": "Tag", + "where": {"tag": self.TAG_PREFIX + stream_name}, + "attributes": ["id", "tag"], + "jointype": "inner", + }, + { + "model": "Column", + "attributes": ["id", "source_name", "order"], + "jointype": "inner", + }, + ], + }, + ) + if result["count"] > 1: + raise Exception( + f"More than one dataset has been returned, could you verify whether the tag for stream {stream_name} is set up " + f"correctly in Cumul.io (expected a tag '{self.TAG_PREFIX}{stream_name}')?" + ) + # A count of zero means that the dataset has not been created on Cumul.io's side yet. + # We'll return None to indicate this. + elif result["count"] == 0: + return None + # return dataset and its columns. + return result["rows"][0] + + def set_replace_tag_on_dataset(self, stream_name: str): + """Add a "replace" tag to a specific dataset based on the stream_name. + The "replace" tag is used to ensure that the next sync will replace the existing data. + """ + dataset_id = self._get_dataset_id_from_stream_name(stream_name) + if dataset_id is not None: + self.logger.info( + f"A tag will be added to the dataset with id {dataset_id} to replace the existing data upon next sync. " + f"As a result, the existing data will not be replaced until the next sync has ran. " + f"This avoids empty datasets which cause 'No data' to be displayed upon querying them." + ) + return self._associate_tag_dataset_id(self.REPLACE_TAG, dataset_id) + self.logger.debug( + f"No dataset found to set Replace tag on (looking for stream name '{stream_name}'), " + f"this might be due to the dataset not existing yet on Cumul.io's side." + ) + + def _push_batch_to_new_dataset(self, stream_name: str, write_buffer: list[list[Any]], column_headers: list[str]): + properties = { + "type": "create", + "data": write_buffer, + "options": { + "header": column_headers, + "update_metadata": True, + "name": {"en": self.INITIAL_DATASET_NAME_PREFIX + stream_name}, + }, + } + result: Mapping[str, Any] = {} + data_is_pushed = False + try_count = 0 + while (not data_is_pushed) and try_count < len(self.BACKOFF_TIMES_IN_SECONDS): + try: + self.logger.info( + f"Pushing {len(write_buffer)} rows to Cumul.io's data warehouse in a new Cumul.io dataset " + f"with name {self.INITIAL_DATASET_NAME_PREFIX}{stream_name}." + ) + + result = self.client.create("data", properties) + data_is_pushed = True + + except Exception as e: + if "Unauthorized" in str(e): + raise Exception( + f"Not able to push a batch of data to a new dataset due to an 'Unauthorized' error. " + f"Please verify that your API key and token are still valid!" + f"Error: {e}" + ) + elif try_count + 1 >= len(self.BACKOFF_TIMES_IN_SECONDS): + raise Exception(f"Exception while creating new dataset after {len(self.BACKOFF_TIMES_IN_SECONDS)} retries: {e}") + + seconds_to_backoff = self.BACKOFF_TIMES_IN_SECONDS[try_count] + try_count += 1 + self.logger.info( + f"Error pushing data to a new dataset during try {try_count}, retrying in {seconds_to_backoff} seconds. Error: {e}" + ) + time.sleep(seconds_to_backoff) + + dataset_id = result["rows"][0]["id"] + try: + # Add a tag to the dataset to allow retrieving it upon further syncs / batch writes + self._associate_tag_dataset_id(stream_name, dataset_id) + except Exception as e: + raise Exception( + f"The data has been stored successfully, but an error occurred while associating a required tag to the " + f"dataset (id: {dataset_id}). This will likely cause issues upon further synchronizations. The following " + f"error occurred: ", + e, + ) + + return dataset_id + + def _push_batch_to_existing_dataset( + self, + dataset_id: str, + write_buffer: list[list[Any]], + column_headers: list[str], + first_batch_replace: bool, + update_metadata: bool, + ): + cumulio_sync_type = "replace" if first_batch_replace else "append" + + properties = { + "type": cumulio_sync_type, + "data": write_buffer, + "securable_id": dataset_id, + "options": { + "header": column_headers, + "update_metadata": update_metadata, + }, + } + data_is_pushed = False + try_count = 0 + while (not data_is_pushed) and try_count < len(self.BACKOFF_TIMES_IN_SECONDS): + try: + self.logger.info( + f"Pushing {len(write_buffer)} rows to Cumul.io dataset with id {dataset_id} in {cumulio_sync_type} mode, " + f"{'while' if update_metadata else 'not'} updating the columns of that dataset." + ) + self.client.create("data", properties) + + data_is_pushed = True + + if first_batch_replace: + # Try to remove replace tag to ensure next syncs do not replace existing data. + self._remove_replace_tag_dataset_id_association(dataset_id) + + except RuntimeError as e: + if "Unauthorized" in str(e): + raise Exception( + f"Not able to push a batch of data to dataset {dataset_id} due to an 'Unauthorized' error. " + f"Please verify that your API key and token are still valid!" + f"Error: {e}" + ) + elif try_count + 1 >= len(self.BACKOFF_TIMES_IN_SECONDS): + raise Exception( + f"Exception while pushing to existing dataset {dataset_id} after {len(self.BACKOFF_TIMES_IN_SECONDS)} retries: ", + e, + ) + + seconds_to_backoff = self.BACKOFF_TIMES_IN_SECONDS[try_count] + try_count += 1 + + self.logger.info( + f"Error pushing data to existing dataset {dataset_id} during try {try_count}, retrying in {seconds_to_backoff} seconds." + ) + + time.sleep(seconds_to_backoff) + + def _dataset_contains_replace_tag(self, dataset_id: str): + """Return a boolean to indicate whether a dataset contains the "replace" tag.""" + result = self.client.get( + "securable", + { + "where": {"type": "dataset", "id": dataset_id}, + "attributes": ["id", "name"], + "include": [ + { + "model": "Tag", + "where": {"tag": self.TAG_PREFIX + self.REPLACE_TAG}, + "attributes": ["id", "tag"], + "jointype": "inner", + } + ], + }, + ) + return False if result["count"] == 0 else True + + def _remove_replace_tag_dataset_id_association(self, dataset_id: str): + """Remove the "replace" tag from a specific dataset.""" + tag_id = self._get_tag_id(self.REPLACE_TAG) + if tag_id is not None: + return self._dissociate_tag_with_dataset_id(tag_id, dataset_id) + self.logger.debug( + f"No replace tag found, so could not remove for Cumul.io dataset with id {dataset_id}." + f"This could be expected as the stream might be configured in overwrite mode." + ) + + def _get_dataset_id_from_stream_name(self, stream_name: str): + """Return a dataset ID based on a Cumul.io tag that includes the stream_name.""" + result = self.client.get( + "securable", + { + "where": {"type": "dataset"}, + "attributes": ["id", "name"], + "include": [ + { + "model": "Tag", + "where": {"tag": self.TAG_PREFIX + stream_name}, + "attributes": ["id", "tag"], + "jointype": "inner", + } + ], + }, + ) + if result["count"] > 1: + raise Exception( + f"More than one dataset has been found, could you verify whether the tag for stream {stream_name} is set up " + f"correctly in Cumul.io (expected a tag '{self.TAG_PREFIX}{stream_name}' on a single dataset)?" + ) + # A count of zero means that the dataset has not been created on Cumul.io's side yet. + # We'll return None to indicate this. + elif result["count"] == 0: + return None + # return dataset ID + return result["rows"][0]["id"] + + def _associate_tag_dataset_id(self, tag_name: str, dataset_id: str): + """Ensure that a specific stream name tag is associated to a dataset ID. + Optionally the Tag is created and associated if not existing yet. + """ + # A tag should be unique and cannot be created multiple times. + # In order to ensure that the association doesn't fail, + # we'll first try to retrieve the tag and then either + # associate it with the newly created securable, + # or create & associate it. + tag_id = self._get_tag_id(tag_name) + if tag_id is not None: + return self._associate_tag_with_dataset_id(tag_id, dataset_id) + return self._create_and_associate_stream_name_tag_with_dataset_id(tag_name, dataset_id) + + def _get_tag_id(self, tag_name: str): + """Return a Tag ID using the stream name.""" + result = self.client.get("tag", {"where": {"tag": self.TAG_PREFIX + tag_name}}) + if result["count"] == 0: + return None + return result["rows"][0]["id"] + + def _associate_tag_with_dataset_id(self, tag_id: str, dataset_id: str): + return self.client.associate("tag", tag_id, "Securables", dataset_id) + + def _dissociate_tag_with_dataset_id(self, tag_id: str, dataset_id: str): + return self.client.dissociate("tag", tag_id, "Securables", dataset_id) + + def _create_and_associate_stream_name_tag_with_dataset_id(self, tag_name: str, dataset_id: str): + return self.client.create( + "tag", + {"tag": self.TAG_PREFIX + tag_name}, + [{"role": "Securables", "id": dataset_id}], + ) diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py new file mode 100644 index 0000000..61c6c5a --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py @@ -0,0 +1,101 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from logging import Logger, getLogger +from typing import Any, Iterable, Mapping + +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type +from destination_cumulio.client import CumulioClient +from destination_cumulio.writer import CumulioWriter + +logger = getLogger("airbyte") + + +class DestinationCumulio(Destination): + def write( + self, + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + input_messages: Iterable[AirbyteMessage], + ) -> Iterable[AirbyteMessage]: + """Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received in the + input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been successfully + persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json. Current format: + { + 'api_host': '', + 'api_key': '', + 'api_token': '' + } + :param configured_catalog: schema of the data being received and how it should be persisted in the destination. + :param input_messages: stream of input messages received from the source. + + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs. + """ + writer = CumulioWriter(config, configured_catalog, logger) + + for configured_stream in configured_catalog.streams: + # Cumul.io does not support removing all data from an existing dataset, and removing the dataset itself will break existing + # dashboards built on top of it. + # Instead, the connector will make sure to push the first batch of data as a "replace" action: this will cause all existing data + # to be replaced with the first batch of data. All next batches will be pushed as an "append" action. + if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite: + writer.delete_stream_entries(configured_stream.stream.name) + + for message in input_messages: + if message.type == Type.STATE: + # Yielding a state message indicates that all records which came before it have been written to the destination. + # We flush all write buffers in the writer, and then output the state message itself. + writer.flush_all() + yield message + elif message.type == Type.RECORD: + record = message.record + assert record is not None + assert record.stream is not None + assert record.data is not None + writer.queue_write_operation(record.stream, record.data) + else: + # ignore other message types for now + continue + + # Make sure to flush any records still in the queue + writer.flush_all() + + def check(self, logger: Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """Tests if the input configuration can be used to successfully connect to the destination with the needed permissions. + + This will test whether the combination of the Cumul.io API host, API key and API token is valid. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + client = CumulioClient(config, logger) + # Verify access by hitting Cumul.io authentication endpoint + client.test_api_token() + + # We're no longer using testing a data push as this might take some time. + # If the API host, key, and token are valid, we can assume Data can be pushed using it. + + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + # The Cumul.io Python SDK currently returns a generic error message when an issue occurs during the request, + # or when the request return e.g. a 401 Unauthorized HTTP response code. + # We'll assume that either the API host is incorrect, or the API key and token are no longer valid. + if not e == "Something went wrong": + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") + return AirbyteConnectionStatus( + status=Status.FAILED, + message="An exception occurred: could it be that the API host is incorrect, or the API key and token are no longer valid?", + ) diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json new file mode 100644 index 0000000..dff9ec3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json @@ -0,0 +1,37 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/cumulio", + "supported_destination_sync_modes": ["overwrite", "append"], + "supportsIncremental": true, + "supportsDBT": false, + "supportsNormalization": false, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Cumulio", + "type": "object", + "required": ["api_host", "api_key", "api_token"], + "additionalProperties": true, + "properties": { + "api_host": { + "title": "Cumul.io API Host URL", + "description": "URL of the Cumul.io API (e.g. 'https://api.cumul.io', 'https://api.us.cumul.io', or VPC-specific API url). Defaults to 'https://api.cumul.io'.", + "default": "https://api.cumul.io", + "type": "string", + "order": 0 + }, + "api_key": { + "title": "Cumul.io API Key", + "description": "An API key generated in Cumul.io's platform (can be generated here: https://app.cumul.io/start/profile/integration).", + "type": "string", + "airbyte_secret": true, + "order": 1 + }, + "api_token": { + "title": "Cumul.io API Token", + "description": "The corresponding API token generated in Cumul.io's platform (can be generated here: https://app.cumul.io/start/profile/integration).", + "type": "string", + "airbyte_secret": true, + "order": 2 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py new file mode 100644 index 0000000..93c8d05 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py @@ -0,0 +1,205 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from logging import Logger +from typing import Any, Mapping + +from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode +from destination_cumulio.client import CumulioClient + + +def _convert_airbyte_configured_stream_into_headers_dict( + configured_stream: ConfiguredAirbyteStream, +): + """Return a dict of column names and types based on the configured Airbyte stream. + Note that the Airbyte types are currently not used due to Cumul.io's Data API Service not supporting specifying column types. + """ + column_headers = {} + for column_header in configured_stream.stream.json_schema["properties"]: + if "airbyte-type" in configured_stream.stream.json_schema["properties"][column_header]: + column_headers[column_header] = { + "airbyte-type": configured_stream.stream.json_schema["properties"][column_header]["airbyte-type"] + } + else: + column_headers[column_header] = {"airbyte-type": configured_stream.stream.json_schema["properties"][column_header]["type"]} + return column_headers + + +class CumulioWriter: + # Cumul.io's Data API service has a limit of pushing 10 000 data points (i.e. rows) in a single request. + # (see note here: https://developer.cumul.io/?shell#data_create) + FLUSH_INTERVAL = 10000 + + def __init__( + self, + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + logger: Logger, + ): + """Create a single Cumul.io Client and a dict of writers. + The Cumul.io Client will be used to send API requests to Cumul.io's API. + The writers dict will contain one element for each configured_stream in the connection. + Each of these dicts have a stream-specific configuration and write buffer. + """ + self.logger = logger + self.client = CumulioClient(config, logger) + self.writers = self._create_writers(configured_catalog) + + def queue_write_operation(self, stream_name: str, data: Mapping): + """Queue data in a specific writer buffer. + It flushes the buffer in case it has reached the flush interval. + """ + cumulio_data = self.transform_data(stream_name, data) + self.writers[stream_name]["write_buffer"].append(cumulio_data) + if len(self.writers[stream_name]["write_buffer"]) == self.FLUSH_INTERVAL: + self.flush(stream_name) + + def flush_all(self): + """Flush all writer buffers.""" + for stream_name in self.writers: + self.flush(stream_name) + + def flush(self, stream_name: str): + """Write a batch of data from the write buffer using the Cumul.io client.""" + self.client.batch_write( + stream_name, + self.writers[stream_name]["write_buffer"], + [column_header["name"] for column_header in self.writers[stream_name]["column_headers"]], + self.writers[stream_name]["is_in_overwrite_sync_mode"], + self.writers[stream_name]["is_first_batch"], + self.writers[stream_name]["update_metadata"], + ) + self.writers[stream_name]["write_buffer"].clear() + if self.writers[stream_name]["is_first_batch"]: + self.writers[stream_name]["is_first_batch"] = False + + def transform_data(self, stream_name: str, airbyte_data: Mapping) -> list[Any]: + """Transform Airbyte data (one row) into Cumul.io's expected data format (a list in the appropriate order). + If data for a specific column is not included in the Airbyte data, the value will be None. + If data for a specific column in the Airbyte data is not recognized, it will be ignored as extraneous. + (see here: https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#output-4) + """ + try: + self.writers[stream_name] + except KeyError: + raise Exception(f"The stream {stream_name} is not defined in the configured_catalog and won't thus be streamed.") + + data: list[Any] = [None for i in range(len(self.writers[stream_name]["column_headers"]))] + for column in airbyte_data: + unknown_data = True + index: int = 0 + for column_header in self.writers[stream_name]["column_headers"]: + if column_header["name"] == column: + unknown_data = False + # Cumul.io doesn't support storing or querying nested (list, dict) or boolean data. + # we'll stringify this data via json.dumps + if ( + isinstance(airbyte_data[column], list) + or isinstance(airbyte_data[column], dict) + or isinstance(airbyte_data[column], bool) + ): + data[index] = json.dumps(airbyte_data[column]) + else: + data[index] = airbyte_data[column] + index += 1 + if unknown_data: + self.logger.debug( + f"The value with name {column} has not been defined in the ConfiguredAirbyteStream and will thus be " + f"ignored as extraneous." + ) + return data + + def delete_stream_entries(self, stream_name: str): + """Set a "replace" tag on a dataset to ensure all existing data will be replaced upon next synchronization.""" + return self.client.set_replace_tag_on_dataset(stream_name) + + def _create_writers(self, configured_catalog: ConfiguredAirbyteCatalog): + """Return a set of writers, one for each stream in the configured_catalog. + This method will also merge the Cumul.io columns for the stream's dataset, if existing.""" + writers = {} + for configured_stream in configured_catalog.streams: + result = self._merge_cumulio_and_airbyte_column_headers(configured_stream) + writers[configured_stream.stream.name] = { + "write_buffer": [], + "column_headers": result["sorted_column_headers"], + "is_in_overwrite_sync_mode": configured_stream.destination_sync_mode == DestinationSyncMode.overwrite, + "is_first_batch": True, + "update_metadata": result["update_metadata"], + } + return writers + + def _merge_cumulio_and_airbyte_column_headers(self, configured_stream: ConfiguredAirbyteStream): + """Merge columns known by Airbyte and Cumul.io. + - If the dataset does not yet exist in Cumul.io (i.e. the first sync), the columns order will be based on "for el in dict" order. + - Upon next synchronizations, the dataset exists in Cumul.io. Its column order will be used to send data in the corresponding order. + - If a new column is added to the source table (i.e. this column doesn't exist yet in Cumul.io), + it will be added at the end of the dataset's columns upon next synchronization. + - If an existing column is removed from the source: + 1. If the next synchronization for this stream runs in "overwrite" mode (or a "replace" tag is set), the Cumul.io dataset will + no longer contain the original column. + 2. If the next synchronization for this stream runs in "append" mode, the Cumul.io dataset will + contain empty values for the non-existing columns for all appended rows. + Note that Airbyte recommends a reset upon changes to source schema(s). In that case, the first batch will be synced + using the "overwrite" mode (due to setting a reset tag on the dataset, see delete_stream_entries implementation). + """ + cumulio_column_headers = self.client.get_ordered_columns(configured_stream.stream.name) + airbyte_column_headers = _convert_airbyte_configured_stream_into_headers_dict(configured_stream) + + update_metadata = False + + merged_column_headers = [] + new_column_count = 0 + for airbyte_column_header in airbyte_column_headers: + merged_column_header = { + "name": airbyte_column_header, + "airbyte-type": airbyte_column_headers[airbyte_column_header]["airbyte-type"], + } + + try: + # Add an order based on the order of the column in the Cumul.io dataset + merged_column_header["order"] = cumulio_column_headers.index(airbyte_column_header) + except ValueError: + # Add an appropriate order to ensure the column appears at the end of the data + new_column_count += 1 + merged_column_header["order"] = len(cumulio_column_headers) + new_column_count + + merged_column_headers.append(merged_column_header) + + sorted_column_headers = sorted(merged_column_headers, key=lambda x: x["order"]) + if new_column_count > 0: + update_metadata = True + + if len(cumulio_column_headers) > 0: + self.logger.info( + f"One or more columns defined in stream {configured_stream.stream.name} are not yet present in Cumul.io, " + f"and will added upon next successful synchronization." + ) + else: + self.logger.info( + f"The dataset for stream {configured_stream.stream.name} doesn't seem to exist in Cumul.io. " + f"The next sync for this stream will create it." + ) + elif not update_metadata: + # Validate whether all columns in Cumul.io are still part of the configured airbyte catalog definition. + for cumulio_column_header in cumulio_column_headers: + try: + # Try to find the Cumul.io column header in the Airbyte columns + airbyte_column_headers[cumulio_column_header] + except KeyError: + # Cumul.io's column hasn't been found, so we'll need to update the dataset's metadata upon next sync. + if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite: + self.logger.info( + f"The source column {cumulio_column_header} in Cumul.io is no longer present in the configured " + f"stream {configured_stream.stream.name} (i.e. in the source). As the stream synchronization is " + f"in overwrite mode, the existing column in Cumul.io will be deleted upon next sync. Check " + f"carefully whether this column is used in any existing Cumul.io dashboards!" + ) + update_metadata = True + + return { + "sorted_column_headers": sorted_column_headers, + "update_metadata": update_metadata, + } diff --git a/airbyte-integrations/connectors/destination-cumulio/icon.svg b/airbyte-integrations/connectors/destination-cumulio/icon.svg new file mode 100644 index 0000000..85cf3ee --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json new file mode 100644 index 0000000..844c37f --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json @@ -0,0 +1,29 @@ +{ + "streams": [ + { + "stream": { + "name": "cumulio_example_table", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "hierarchy_column": { + "type": "string" + }, + "numeric_column": { + "type": "number" + }, + "datetime_column": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py new file mode 100644 index 0000000..545241d --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py @@ -0,0 +1,276 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import time +from logging import Logger, getLogger +from typing import Any, Dict, Mapping + +import pytest +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_cumulio import DestinationCumulio +from destination_cumulio.client import CumulioClient + + +@pytest.fixture(name="logger") +def logger_fixture() -> Logger: + return getLogger("airbyte") + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + with open("secrets/config.json", "r") as f: + return json.loads(f.read()) + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = { + "type": "object", + "properties": { + "string_col": {"type": "str"}, + "int_col": {"type": "integer"}, + "obj_col": {"type": "object"}, + "arr_col": {"type": "array"}, + }, + } + + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="append_integration_test_stream", + json_schema=stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="overwrite_integration_test_stream", + json_schema=stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream]) + + +@pytest.fixture(autouse=True) +def delete_datasets(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, logger: Logger): + cumulio_client = CumulioClient(config, logger) + for stream in configured_catalog.streams: + dataset = cumulio_client.get_dataset_and_columns_from_stream_name(stream.stream.name) + if dataset: + logger.info( + f"Existing integration test dataset found. Will delete Cumul.io dataset for integration test stream {stream.stream.name}." + ) + try: + cumulio_client.client.delete("securable", dataset["id"]) + except Exception as e: + logger.info( + f"The following exception occurred when trying to delete the dataset " + f"for integration test stream {stream.stream.name}: {e}" + ) + + +def test_check_valid_config(config: Mapping, logger: Logger): + outcome = DestinationCumulio().check(logger, config) + assert outcome.status == Status.SUCCEEDED + + +def test_check_incomplete_config(logger: Logger): + outcome = DestinationCumulio().check(logger, {"api_host": "https://api.cumul.io"}) + assert outcome.status == Status.FAILED + + +def test_check_invalid_config(logger: Logger): + outcome = DestinationCumulio().check( + logger, + { + "api_host": ".invalid.url", + "api_key": "invalid_key", + "api_token": "invalid_token", + }, + ) + assert outcome.status == Status.FAILED + + +def _state(data: Dict[str, Any]) -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=data)) + + +def _record(stream_name: str, str_value: str, int_value: int, obj_value: dict, arr_value: list) -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream=stream_name, + data={ + "string_col": str_value, + "int_col": int_value, + "obj_col": obj_value, + "arr_col": arr_value, + }, + emitted_at=0, + ), + ) + + +def _retrieve_all_records(cumulio_client, stream_name): + dataset_and_columns = cumulio_client.get_dataset_and_columns_from_stream_name(stream_name) + # Wait 5 seconds before trying to retrieve the data to ensure it can be properly retrieved + time.sleep(5) + if dataset_and_columns is not None: + ordered_columns = cumulio_client.get_ordered_columns(stream_name) + dimension_columns = list( + map( + lambda x, y: { + "dataset_id": dataset_and_columns["id"], + "column_id": y["id"], + }, + ordered_columns, + dataset_and_columns["columns"], + ) + ) + int_col_ind = ordered_columns.index("int_col") + + raw_data_query = { + "dimensions": dimension_columns, + "options": {"rollup_data": False}, + "order": [ + { + "dataset_id": dataset_and_columns["id"], + "column_id": dataset_and_columns["columns"][int_col_ind]["id"], + "order": "asc", + } + ], + } + raw_data = cumulio_client.client.get("data", raw_data_query) + airbyte_data_to_return = [] + for row in raw_data["data"]: + airbyte_data_row = {} + for col_ind, column in enumerate(dataset_and_columns["columns"]): + if isinstance(row[col_ind], dict): + airbyte_data_row[column["source_name"]] = row[col_ind]["id"] + else: + airbyte_data_row[column["source_name"]] = row[col_ind] + airbyte_data_to_return.append( + AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage(stream=stream_name, data=airbyte_data_row, emitted_at=0), + ) + ) + return airbyte_data_to_return + return None + + +def test_write_append( + config: Mapping, + configured_catalog: ConfiguredAirbyteCatalog, + logger: Logger, +): + """ + This test verifies that: + - Writing a stream in "append" mode appends new records while preserving existing data. + - The correct state message is output by the connector at the end of the sync. + - Object and Array data is appropriately stringified in Cumul.io. + """ + stream_name = configured_catalog.streams[0].stream.name + destination = DestinationCumulio() + + state_message = _state({"state": "3"}) + record_chunk_1 = [_record(stream_name, "test-" + str(i), i, {"test": i}, ["test", i]) for i in range(1, 3)] + + output_states_1 = list(destination.write(config, configured_catalog, [*record_chunk_1, state_message])) + assert [state_message] == output_states_1 + + record_chunk_2 = [_record(stream_name, "test-" + str(i), i, {"test": i}, ["test", i]) for i in range(3, 5)] + + output_states_2 = list(destination.write(config, configured_catalog, [*record_chunk_2, state_message])) + assert [state_message] == output_states_2 + + cumulio_client = CumulioClient(config, logger) + + records_in_destination = _retrieve_all_records(cumulio_client, stream_name) + + expected_records = [ + AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream=stream_name, + data={ + "string_col": "test-" + str(i), + "int_col": i, + "obj_col": json.dumps({"test": i}), + "arr_col": json.dumps(["test", i]), + }, + emitted_at=0, + ), + ) + for i in range(1, 5) + ] + + assert expected_records == records_in_destination + + +def test_write_overwrite( + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + logger: Logger, +): + """ + This test verifies that: + - writing a stream in "append" mode overwrite all exiting data. + - the correct state message is output by the connector at the end of the sync. + - Object and Array data is appropriately stringified in Cumul.io. + """ + stream_name = configured_catalog.streams[1].stream.name + destination = DestinationCumulio() + + state_message = _state({"state": "3"}) + record_chunk_1 = [_record(stream_name, "oldtest-" + str(i), i, {"oldtest": i}, ["oldtest", i]) for i in range(1, 3)] + + output_states_1 = list(destination.write(config, configured_catalog, [*record_chunk_1, state_message])) + assert [state_message] == output_states_1 + + record_chunk_2 = [_record(stream_name, "newtest-" + str(i), i, {"newtest": i}, ["newtest", i]) for i in range(1, 3)] + + output_states_2 = list(destination.write(config, configured_catalog, [*record_chunk_2, state_message])) + assert [state_message] == output_states_2 + + cumulio_client = CumulioClient(config, logger) + + records_in_destination = _retrieve_all_records(cumulio_client, stream_name) + + expected_records = [ + AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream=stream_name, + data={ + "string_col": "newtest-" + str(i), + "int_col": i, + "obj_col": json.dumps({"newtest": i}), + "arr_col": json.dumps(["newtest", i]), + }, + emitted_at=0, + ), + ) + for i in range(1, 3) + ] + + assert expected_records == records_in_destination diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json b/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json new file mode 100644 index 0000000..2a1ca74 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json @@ -0,0 +1,5 @@ +{ + "api_host": "https://api.cumul.io", + "api_key": "CUMULIO_API_KEY", + "api_token": "CUMULIO_API_TOKEN" +} diff --git a/airbyte-integrations/connectors/destination-cumulio/main.py b/airbyte-integrations/connectors/destination-cumulio/main.py new file mode 100644 index 0000000..3ad0d71 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_cumulio import DestinationCumulio + +if __name__ == "__main__": + DestinationCumulio().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-cumulio/metadata.yaml b/airbyte-integrations/connectors/destination-cumulio/metadata.yaml new file mode 100644 index 0000000..bef0bae --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorType: destination + definitionId: e088acb6-9780-4568-880c-54c2dd7f431b + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-cumulio + githubIssueLabel: destination-cumulio + connectorSubtype: api + icon: cumulio.svg + license: MIT + name: Cumul.io + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/cumulio + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-cumulio/requirements.txt b/airbyte-integrations/connectors/destination-cumulio/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-cumulio/setup.py b/airbyte-integrations/connectors/destination-cumulio/setup.py new file mode 100644 index 0000000..e613da7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/setup.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "cumulio"] + +TEST_REQUIREMENTS = ["pytest~=6.2"] + +setup( + name="destination_cumulio", + description="Airbyte destination connector implementation for Cumul.io.", + author="Cumul.io", + author_email="support@cumul.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py new file mode 100644 index 0000000..258e8ff --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py @@ -0,0 +1,629 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, Mapping +from unittest.mock import ANY, MagicMock, patch + +import pytest +from destination_cumulio.client import CumulioClient + +# "# type: ignore" was added in several places to avoid mypy complaining about patching functions with MagicMock + + +@pytest.fixture(name="logger") +def logger_fixture() -> MagicMock: + return MagicMock() + + +@pytest.fixture(name="cumulio_client") +def cumulio_client_fixture(logger: MagicMock) -> CumulioClient: + # Create a mock configuration dictionary + config = { + "api_key": "123456", + "api_token": "abcdef", + "api_host": "https://api.cumul.io", + } + # Initialize a CumulioClient object with the mock configuration for the Cumulio class + with patch("destination_cumulio.client.Cumulio", MagicMock()): + return CumulioClient(config, logger) + + +@pytest.fixture(name="dummy_data") +def dummy_data_fixture() -> Mapping[str, Any]: + return { + "data": [ + [ + "Text value 1", + 1, + "2022-01-01T00:00:00.000Z", + ], + ["Text value 2", 2, "2022-02-01T00:00:00.000Z"], + ["Text value 3", 3, "2022-03-01T00:00:00.000Z"], + ], + "columns": ["Text column", "Numeric column", "Datetime column"], + } + + +# tests for batch_write method + + +def test_batch_write_append_empty_write_buffer(cumulio_client: CumulioClient): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + + cumulio_client.batch_write( + stream_name="test-stream", + write_buffer=[], + column_headers=["test-column"], + is_in_overwrite_sync_mode=False, + is_first_batch=True, + update_metadata=True, + ) + + cumulio_client._get_dataset_id_from_stream_name.assert_not_called() + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._push_batch_to_existing_dataset.assert_not_called() + + cumulio_client.batch_write( + stream_name="test-stream", + write_buffer=[[]], + column_headers=["test-column"], + is_in_overwrite_sync_mode=False, + is_first_batch=True, + update_metadata=True, + ) + + cumulio_client._get_dataset_id_from_stream_name.assert_not_called() + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._push_batch_to_existing_dataset.assert_not_called() + + +def test_batch_write_append_no_existing_dataset(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.get = MagicMock(return_value={"count": 0, "Rows": []}) + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + + stream_name = "test-stream" + + cumulio_client.batch_write( + stream_name=stream_name, + write_buffer=dummy_data["data"], + column_headers=dummy_data["columns"], + is_in_overwrite_sync_mode=False, + is_first_batch=True, + update_metadata=True, + ) + + expected_properties = { + "where": {"type": "dataset"}, + "attributes": ["id", "name"], + "include": [ + { + "model": "Tag", + "where": {"tag": cumulio_client.TAG_PREFIX + stream_name}, + "attributes": ["id", "tag"], + "jointype": "inner", + } + ], + } + + cumulio_client.client.get.assert_called_once_with("securable", expected_properties) + + cumulio_client._push_batch_to_existing_dataset.assert_not_called() + + cumulio_client._push_batch_to_new_dataset.assert_called_once_with(stream_name, dummy_data["data"], dummy_data["columns"]) + + +def test_batch_write_existing_dataset_no_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=False) # type: ignore + + stream_name = "test-stream" + + cumulio_client.batch_write( + stream_name=stream_name, + write_buffer=dummy_data["data"], + column_headers=dummy_data["columns"], + is_in_overwrite_sync_mode=False, + is_first_batch=True, + update_metadata=True, + ) + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id") + cumulio_client._push_batch_to_existing_dataset.assert_called_once_with( + "dataset_id", dummy_data["data"], dummy_data["columns"], False, True + ) + + +def test_batch_write_existing_dataset_first_batch_replace_overwrite_mode(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=False) # type: ignore + + stream_name = "test-stream" + + cumulio_client.batch_write( + stream_name=stream_name, + write_buffer=dummy_data["data"], + column_headers=dummy_data["columns"], + is_in_overwrite_sync_mode=True, + is_first_batch=True, + update_metadata=True, + ) + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id") + cumulio_client._push_batch_to_existing_dataset.assert_called_once_with( + "dataset_id", dummy_data["data"], dummy_data["columns"], True, True + ) + + +def test_batch_write_existing_dataset_first_batch_replace_tag(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=True) # type: ignore + + stream_name = "test-stream" + + cumulio_client.batch_write( + stream_name=stream_name, + write_buffer=dummy_data["data"], + column_headers=dummy_data["columns"], + is_in_overwrite_sync_mode=False, + is_first_batch=True, + update_metadata=True, + ) + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id") + cumulio_client._push_batch_to_existing_dataset.assert_called_once_with( + "dataset_id", dummy_data["data"], dummy_data["columns"], True, True + ) + + +def test_batch_write_existing_dataset_non_first_batch(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore + cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore + cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=True) # type: ignore + + stream_name = "test-stream" + + cumulio_client.batch_write( + stream_name=stream_name, + write_buffer=dummy_data["data"], + column_headers=dummy_data["columns"], + is_in_overwrite_sync_mode=True, + is_first_batch=False, + update_metadata=True, + ) + cumulio_client._push_batch_to_new_dataset.assert_not_called() + cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id") + cumulio_client._push_batch_to_existing_dataset.assert_called_once_with( + "dataset_id", dummy_data["data"], dummy_data["columns"], False, True + ) + + +# tests for test_api_token method + + +def test_api_token_unknown_combination(cumulio_client: CumulioClient): + """ "Test that the test_api_token method correctly throws an error upon an invalid combination""" + cumulio_client.client.get = MagicMock(return_value={"count": 0}) + with pytest.raises(Exception): + cumulio_client.test_api_token() + + +def test_api_token_api_call(cumulio_client: CumulioClient): + """ "Test that the test_api_token method makes an API request to the authorization endpoint""" + cumulio_client.client.get = MagicMock(return_value={"count": 1}) + cumulio_client.test_api_token() + cumulio_client.client.get.assert_called_with("authorization", {"where": {"type": "api"}}) + + +def test_test_data_push_method(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + """ "Test that the test_data_push method deletes the dataset afterwards""" + cumulio_client.batch_write = MagicMock() # type: ignore + cumulio_client.delete_dataset = MagicMock() # type: ignore + + stream_name = "test-stream" + + cumulio_client.test_data_push(stream_name, dummy_data["data"], dummy_data["columns"]) + + cumulio_client.delete_dataset.assert_called_once_with("test-stream") + + +# tests for delete_dataset method + + +def test_delete_dataset_no_dataset_found(cumulio_client: CumulioClient): + cumulio_client.client.delete = MagicMock() + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value=None) # type: ignore + + cumulio_client.delete_dataset("stream_name") + + # assert that the _get_dataset_id_from_stream_name method was called once with the correct arguments + cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name") + + # assert that the client.delete method is not called as no dataset was found + cumulio_client.client.delete.assert_not_called() + + +def test_delete_dataset_dataset_found(cumulio_client: CumulioClient): + cumulio_client.client.delete = MagicMock() + cumulio_client._get_dataset_id_from_stream_name = MagicMock( # type: ignore + return_value="dataset_id" + ) # type: ignore + + cumulio_client.delete_dataset("stream_name") + + # assert that the _get_dataset_id_from_stream_name method was called once with the correct arguments + cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name") + + # assert that the client.delete method was called once with the correct arguments + cumulio_client.client.delete.assert_called_once_with("securable", "dataset_id") + + +# tests for get_ordered_columns method + + +def test_get_ordered_columns_dataset_not_created(cumulio_client: CumulioClient): + cumulio_client.get_dataset_and_columns_from_stream_name = MagicMock(return_value=None) # type: ignore + result = cumulio_client.get_ordered_columns("stream_name") + assert result == [] + + +def test_get_ordered_columns_same_order(cumulio_client: CumulioClient): + cumulio_dataset_and_columns = { + "id": "dataset_id", + "columns": [ + {"source_name": "column1", "order": 2}, + {"source_name": "column2", "order": 1}, + ], + } + cumulio_client.get_dataset_and_columns_from_stream_name = MagicMock(return_value=cumulio_dataset_and_columns) # type: ignore + result = cumulio_client.get_ordered_columns("stream_name") + assert result == ["column2", "column1"] + + +# tests for _push_batch_to_new_dataset method + + +def test_push_batch_to_new_dataset(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.create = MagicMock(return_value={"rows": [{"id": "new_dataset_id"}]}) + cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore + + stream_name = "test_stream" + + expected_request_properties = { + "type": "create", + "data": dummy_data["data"], + "options": { + "header": dummy_data["columns"], + "update_metadata": True, + "name": {"en": cumulio_client.INITIAL_DATASET_NAME_PREFIX + stream_name}, + }, + } + cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"]) + cumulio_client.client.create.assert_called_once_with("data", expected_request_properties) + cumulio_client._associate_tag_dataset_id.assert_called_once_with(stream_name, "new_dataset_id") + + +def test_push_batch_to_new_dataset_all_retries_error(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.create = MagicMock(side_effect=RuntimeError("Internal Server Error")) + stream_name = "test_stream" + + with patch("destination_cumulio.client.time", MagicMock()): + with pytest.raises(Exception): + cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"]) + + +def test_push_batch_to_new_dataset_first_try_fails(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + effects = iter([RuntimeError("Internal Server Error")]) + + def side_effect(*_): + try: + raise next(effects) + except StopIteration: + return {"rows": [{"id": "new_dataset_id"}]} + + cumulio_client.client.create = MagicMock(side_effect=side_effect) + cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore + + stream_name = "test_stream" + + expected_request_properties = { + "type": "create", + "data": dummy_data["data"], + "options": { + "header": dummy_data["columns"], + "update_metadata": True, + "name": {"en": cumulio_client.INITIAL_DATASET_NAME_PREFIX + stream_name}, + }, + } + + with patch("destination_cumulio.client.time", MagicMock()): + cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"]) + cumulio_client.client.create.assert_called_with("data", expected_request_properties) + + assert cumulio_client.client.create.call_count == 2 + + cumulio_client._associate_tag_dataset_id.assert_called_once_with(stream_name, "new_dataset_id") + + +# tests for _push_batch_to_existing_dataset method + + +def test_push_batch_to_existing_dataset_all_retries_error(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.create = MagicMock(side_effect=RuntimeError("Internal Server Error")) + cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore + + dataset_id = "dataset_id" + + with patch("destination_cumulio.client.time", MagicMock()): + with pytest.raises(Exception): + cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True) + + +def test_push_batch_to_existing_dataset_first_try_fails(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + effects = iter([RuntimeError("Internal Server Error")]) + + def side_effect(*_): + try: + raise next(effects) + except StopIteration: + return None + + cumulio_client.client.create = MagicMock(side_effect=side_effect) + cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore + + dataset_id = "dataset_id" + + expected_request_properties = { + "type": "append", + "data": dummy_data["data"], + "securable_id": dataset_id, + "options": { + "header": dummy_data["columns"], + "update_metadata": True, + }, + } + + with patch("destination_cumulio.client.time", MagicMock()): + cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True) + cumulio_client.client.create.assert_called_with("data", expected_request_properties) + + assert cumulio_client.client.create.call_count == 2 + + cumulio_client._remove_replace_tag_dataset_id_association.assert_not_called() + + +def test_push_batch_to_existing_dataset_no_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.create = MagicMock() + cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore + + dataset_id = "dataset_id" + + expected_request_properties = { + "type": "append", + "data": dummy_data["data"], + "securable_id": dataset_id, + "options": { + "header": dummy_data["columns"], + "update_metadata": True, + }, + } + + cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True) + cumulio_client.client.create.assert_called_once_with("data", expected_request_properties) + cumulio_client._remove_replace_tag_dataset_id_association.assert_not_called() + + +def test_push_batch_to_existing_dataset_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]): + cumulio_client.client.create = MagicMock() + cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore + + dataset_id = "dataset_id" + + expected_request_properties = { + "type": "replace", + "data": dummy_data["data"], + "securable_id": dataset_id, + "options": { + "header": dummy_data["columns"], + "update_metadata": True, + }, + } + + cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], True, True) + cumulio_client.client.create.assert_called_once_with("data", expected_request_properties) + cumulio_client._remove_replace_tag_dataset_id_association.assert_called_once_with(dataset_id) + + +# tests for _dataset_contains_replace_tag method + + +def test_get_dataset_and_columns_from_stream_name_no_dataset( + cumulio_client: CumulioClient, +): + cumulio_dataset_and_columns_result = {"count": 0, "rows": []} + + # Test when no dataset is found + cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result) + result = cumulio_client.get_dataset_and_columns_from_stream_name("test_stream") + assert result is None + + +def test_get_dataset_and_columns_from_stream_name_single_existing_dataset( + cumulio_client: CumulioClient, +): + cumulio_dataset_and_columns_result: Mapping[str, Any] = { + "count": 1, + "rows": [ + { + "id": "dataset_id", + "columns": [ + {"source_name": "column1", "order": 2}, + {"source_name": "column2", "order": 1}, + ], + } + ], + } + # Test when dataset is found + cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result) + result = cumulio_client.get_dataset_and_columns_from_stream_name("test_stream") + assert result["id"] == cumulio_dataset_and_columns_result["rows"][0]["id"] + assert result["columns"] == cumulio_dataset_and_columns_result["rows"][0]["columns"] + + +def test_get_dataset_and_columns_from_stream_name_multiple_existing_datasets( + cumulio_client: CumulioClient, +): + """Tests whether an exception is thrown when multiple datasets are returned for a stream name""" + cumulio_dataset_and_columns_result = { + "count": 2, + "rows": [ + { + "id": "dataset_id_1", + "columns": [ + {"source_name": "column1", "order": 2}, + {"source_name": "column2", "order": 1}, + ], + }, + { + "id": "dataset_id_2", + "columns": [ + {"source_name": "column1", "order": 1}, + {"source_name": "column2", "order": 2}, + ], + }, + ], + } + # Test when multiple datasets are found + cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result) + with pytest.raises(Exception): + cumulio_client.get_dataset_and_columns_from_stream_name("test_stream") + + +# tests for the set_replace_tag_on_dataset method + + +def test_set_replace_tag_on_dataset_no_dataset_found(cumulio_client: CumulioClient): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value=None) # type: ignore + cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore + + cumulio_client.set_replace_tag_on_dataset("stream_name") + + cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name") + cumulio_client._associate_tag_dataset_id.assert_not_called() + + +def test_set_replace_tag_on_dataset_existing_dataset(cumulio_client: CumulioClient): + cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore + cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore + + cumulio_client.set_replace_tag_on_dataset("stream_name") + + cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name") + cumulio_client._associate_tag_dataset_id.assert_called_once_with(cumulio_client.REPLACE_TAG, "dataset_id") + + +# tests for _dataset_contains_replace_tag method + + +def test_dataset_contains_replace_tag(cumulio_client: CumulioClient): + dataset_id = "123" + cumulio_client.client.get = MagicMock(return_value={"count": 1}) + assert cumulio_client._dataset_contains_replace_tag(dataset_id) is True + + +def test_dataset_does_not_contain_replace_tag(cumulio_client: CumulioClient): + dataset_id = "123" + cumulio_client.client.get = MagicMock(return_value={"count": 0}) + assert cumulio_client._dataset_contains_replace_tag(dataset_id) is False + + +# tests for _get_dataset_id_from_stream_name method + + +def test_get_dataset_id_from_stream_name_no_dataset(cumulio_client: CumulioClient): + cumulio_client.client.get.return_value = {"count": 0, "rows": []} + dataset_id = cumulio_client._get_dataset_id_from_stream_name("test_stream") + assert dataset_id is None + + +def test_get_dataset_id_from_stream_name_single_dataset(cumulio_client: CumulioClient): + cumulio_client.client.get.return_value = { + "count": 1, + "rows": [{"id": "dataset_id", "name": "Test dataset"}], + } + dataset_id = cumulio_client._get_dataset_id_from_stream_name("test_stream") + assert dataset_id == "dataset_id" + + +def test_get_dataset_id_from_stream_name_multiple_datasets( + cumulio_client: CumulioClient, +): + """Tests whether an exception is thrown when multiple datasets are returned for a stream name""" + cumulio_client.client.get.return_value = { + "count": 2, + "rows": [ + {"id": "dataset_id_1", "name": "Test dataset 1"}, + {"id": "dataset_id_2", "name": "Test dataset 2"}, + ], + } + with pytest.raises(Exception): + cumulio_client._get_dataset_id_from_stream_name("test_stream") + + +# tests for _associate_tag_dataset_id method + + +def test_associate_tag_dataset_id_no_tag_found(cumulio_client: CumulioClient): + cumulio_client._get_tag_id = MagicMock(return_value=None) # type: ignore + cumulio_client._create_and_associate_stream_name_tag_with_dataset_id = MagicMock() # type: ignore + cumulio_client._associate_tag_with_dataset_id = MagicMock() # type: ignore + + cumulio_client._associate_tag_dataset_id("test_stream", "test_dataset_id") + + cumulio_client._create_and_associate_stream_name_tag_with_dataset_id.assert_called_once_with("test_stream", "test_dataset_id") + cumulio_client._associate_tag_with_dataset_id.assert_not_called() + + +def test_associate_tag_dataset_id_tag_found(cumulio_client: CumulioClient): + cumulio_client._get_tag_id = MagicMock(return_value="tag_id") # type: ignore + cumulio_client._create_and_associate_stream_name_tag_with_dataset_id = MagicMock() # type: ignore + cumulio_client._associate_tag_with_dataset_id = MagicMock() # type: ignore + + cumulio_client._associate_tag_dataset_id("test_stream", "test_dataset_id") + + cumulio_client._associate_tag_with_dataset_id.assert_called_once_with("tag_id", "test_dataset_id") + cumulio_client._create_and_associate_stream_name_tag_with_dataset_id.assert_not_called() + + +# tests for _get_tag_id method + + +def test_get_tag_id_no_tag_found(cumulio_client: CumulioClient): + tag_api_response = {"count": 0, "rows": []} + cumulio_client.client.get = MagicMock(return_value=tag_api_response) + + result = cumulio_client._get_tag_id("test_stream") + + cumulio_client.client.get.assert_called_once_with("tag", ANY) + assert result is None + + +def test_get_tag_id_tag_found(cumulio_client: CumulioClient): + tag_api_response: Mapping[str, Any] = {"count": 1, "rows": [{"id": "test_tag_id"}]} + cumulio_client.client.get = MagicMock(return_value=tag_api_response) + + result = cumulio_client._get_tag_id("test_stream") + + cumulio_client.client.get.assert_called_once_with("tag", ANY) + assert result == tag_api_response["rows"][0]["id"] diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py new file mode 100644 index 0000000..4805fb5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py @@ -0,0 +1,155 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from datetime import datetime +from logging import Logger, getLogger +from typing import Any, Mapping +from unittest.mock import MagicMock, call, patch + +import pytest +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + SyncMode, + Type, +) +from destination_cumulio.destination import DestinationCumulio + + +@pytest.fixture(name="logger") +def logger_fixture() -> Logger: + return getLogger("airbyte") + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + return { + "api_key": "123abc", + "api_token": "456def", + "api_host": "https://api.cumul.io", + } + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = { + "type": "object", + "properties": { + "string_column": {"type": "integer"}, + "int_column": {"type": "integer"}, + }, + } + + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="append_stream", + json_schema=stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="overwrite_stream", + json_schema=stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream]) + + +@pytest.fixture(name="airbyte_message_1") +def airbyte_message_1_fixture() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="append_stream", + data={"string_column": "value_1", "int_column": 1}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@pytest.fixture(name="airbyte_message_2") +def airbyte_message_2_fixture() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="overwrite_stream", + data={"string_column": "value_2", "int_column": 2}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@pytest.fixture(name="airbyte_state_message") +def airbyte_state_message_fixture() -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={})) + + +def test_check(config: Mapping[str, Any], logger: MagicMock): + with patch("destination_cumulio.destination.CumulioClient") as cumulio_client: + destination_cumulio = DestinationCumulio() + destination_cumulio.check(logger, config) + assert cumulio_client.mock_calls == [ + call(config, logger), + call().test_api_token(), + ] + + +def test_write_no_input_messages( + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + airbyte_message_1: AirbyteMessage, + airbyte_message_2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, + logger: MagicMock, +): + with patch("destination_cumulio.destination.CumulioWriter") as cumulio_writer: + destination_cumulio = DestinationCumulio() + + input_messages = [airbyte_state_message] + result = list(destination_cumulio.write(config, configured_catalog, input_messages)) + assert result == [airbyte_state_message] + + assert cumulio_writer.mock_calls == [ + call(config, configured_catalog, logger), + call().delete_stream_entries("overwrite_stream"), + call().flush_all(), # The first flush_all is called before yielding the state message + call().flush_all(), # The second flush_all is called after going through all input messages + ] + + +def test_write( + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + airbyte_message_1: AirbyteMessage, + airbyte_message_2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, + logger: MagicMock, +): + with patch("destination_cumulio.destination.CumulioWriter") as cumulio_writer: + input_messages = [airbyte_message_1, airbyte_message_2, airbyte_state_message] + destination_cumulio = DestinationCumulio() + result = list(destination_cumulio.write(config, configured_catalog, input_messages)) + assert result == [airbyte_state_message] + assert cumulio_writer.mock_calls == [ + call(config, configured_catalog, logger), + call().delete_stream_entries("overwrite_stream"), + call().queue_write_operation("append_stream", {"string_column": "value_1", "int_column": 1}), + call().queue_write_operation("overwrite_stream", {"string_column": "value_2", "int_column": 2}), + call().flush_all(), # The first flush_all is called before yielding the state message + call().flush_all(), # The second flush_all is called after going through all input messages + ] diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py new file mode 100644 index 0000000..ac921c7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py @@ -0,0 +1,512 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import unittest +from typing import Any, Mapping +from unittest.mock import MagicMock, patch + +import pytest +from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, SyncMode +from destination_cumulio.writer import CumulioWriter + + +@pytest.fixture(name="logger") +def logger_fixture() -> MagicMock: + return MagicMock() + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + return { + "api_key": "123abc", + "api_token": "456def", + "api_host": "https://api.cumul.io", + } + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + orders_stream_schema = { + "type": "object", + "properties": { + "order_id": {"type": "integer"}, + "amount": {"type": "integer"}, + "customer_id": {"type": "string"}, + }, + } + products_stream_schema = { + "type": "object", + "properties": {"product_id": {"type": "integer"}}, + } + + orders_append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="orders", + json_schema=orders_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + products_overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="products", + json_schema=products_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[orders_append_stream, products_overwrite_stream]) + + +@pytest.fixture(name="writer") +def writer_no_existing_cumulio_columns( + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + logger: MagicMock, +) -> CumulioWriter: + """Returns a CumulioWriter using MagicMock, and mocking the return_value of all used CumulioClient methods.""" + with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock: + # Mock get_ordered_columns to return no existing Cumul.io columns (dataset hasn't been created yet --> first sync) + cumulio_client_mock.return_value.get_ordered_columns.return_value = [] + # cumulio_client_mock.return_value.batch_write.return_value = None + # cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None + return CumulioWriter(config, configured_catalog, logger) + + +def test_small_enough_data_point_limit(writer: CumulioWriter): + """Tests whether the FLUSH_INTERVAL variable is smaller than the maximum amount of data points Cumul.io supports.""" + assert writer.FLUSH_INTERVAL <= 10000 + + +def test_init(writer: CumulioWriter): + """Tests whether CumulioWriter is correctly initialized for streams with no known Cumulio dataset (i.e. first sync for each stream).""" + + # Assert each stream is correctly initializing writers + assert "orders" in writer.writers + assert "products" in writer.writers + + # Assert each stream is correctly initializing empty write buffer + assert len(writer.writers["orders"]["write_buffer"]) == 0 + assert len(writer.writers["products"]["write_buffer"]) == 0 + + # Assert each stream is correctly initializing is_in_overwrite_sync_mode + assert writer.writers["orders"]["is_in_overwrite_sync_mode"] is False + assert writer.writers["products"]["is_in_overwrite_sync_mode"] is True + + # Assert each stream is correctly initializing is_first_batch to True + assert writer.writers["orders"]["is_first_batch"] is True + assert writer.writers["products"]["is_first_batch"] is True + + # Assert each stream is correctly initializing update_metadata (due to no columns from Cumul.io in this writer, both are True) + assert writer.writers["orders"]["update_metadata"] is True + assert writer.writers["products"]["update_metadata"] is True + + +def test_transform_data(writer: CumulioWriter): + case = unittest.TestCase() + + data = {"order_id": 1, "amount": 100.0, "customer_id": "cust_1"} + transformed_data = writer.transform_data("orders", data) + case.assertCountEqual(transformed_data, ["cust_1", 1, 100.0]) + + +def test_transform_data_missing_data(writer: CumulioWriter): + case = unittest.TestCase() + + missing_data = {"order_id": 1, "customer_id": "cust_1"} + transformed_data = writer.transform_data("orders", missing_data) + case.assertCountEqual(transformed_data, ["cust_1", 1, None]) + + +def test_transform_data_additional_data(writer: CumulioWriter): + case = unittest.TestCase() + + additional_data = { + "order_id": 1, + "amount": 100.0, + "customer_id": "cust_1", + "custmer_name": "Customer 1", + } + transformed_data = writer.transform_data("orders", additional_data) + case.assertCountEqual(transformed_data, ["cust_1", 1, 100.0]) + + +def test_transform_data_bool_data(writer: CumulioWriter): + case = unittest.TestCase() + + bool_data = {"order_id": 1, "amount": 100.0, "customer_id": True} + transformed_data = writer.transform_data("orders", bool_data) + case.assertCountEqual(transformed_data, ["true", 1, 100.0]) + + +def test_transform_data_dict_data(writer: CumulioWriter): + case = unittest.TestCase() + + dict_data = {"order_id": 1, "amount": 100.0, "customer_id": {"key": "value"}} + transformed_data = writer.transform_data("orders", dict_data) + case.assertCountEqual(transformed_data, ['{"key": "value"}', 1, 100.0]) + + +def test_transform_data_arr_data(writer: CumulioWriter): + case = unittest.TestCase() + + arr_data = {"order_id": 1, "amount": 100.0, "customer_id": ["test1", "test2"]} + transformed_data = writer.transform_data("orders", arr_data) + case.assertCountEqual(transformed_data, ['["test1", "test2"]', 1, 100.0]) + + +def test_queue_write_operation(writer: CumulioWriter): + # Set flush interval to max value to avoid flushing data + writer.FLUSH_INTERVAL = 10000 + + writer.client.batch_write = MagicMock() # type: ignore + + case = unittest.TestCase() + + order_data = {"order_id": 1, "amount": 100.0, "customer_id": "customer_1"} + writer.queue_write_operation("orders", order_data) + + # Assert that write_buffer from the orders stream contains a single value + assert len(writer.writers["orders"]["write_buffer"]) == 1 + case.assertCountEqual(writer.writers["orders"]["write_buffer"][0], ["customer_1", 1, 100.0]) + + +def test_queue_write_operation_two_streams(writer: CumulioWriter): + # Set flush interval to max value to avoid flushing data + writer.FLUSH_INTERVAL = 10000 + + writer.client.batch_write = MagicMock() # type: ignore + + order_data = {"order_id": 1, "amount": 100.0, "customer_id": "customer_1"} + writer.queue_write_operation("orders", order_data) + + # Assert that write_buffer from the orders stream contains a single value + assert len(writer.writers["orders"]["write_buffer"]) == 1 + + product_data = {"product_id": 1} + writer.queue_write_operation("products", product_data) + + # Assert that the orders write_buffer isn't influenced by write operations from the products stream + assert len(writer.writers["orders"]["write_buffer"]) == 1 + + # Assert that write_buffer from the products stream contains a single value + assert len(writer.writers["products"]["write_buffer"]) == 1 + assert writer.writers["products"]["write_buffer"] == [[1]] + + product_data = {"product_id": 2} + writer.queue_write_operation("products", product_data) + # Assert that write_buffer from the orders stream contains two values + assert writer.writers["products"]["write_buffer"] == [[1], [2]] + + +def test_queue_write_operation_non_existing_stream(writer: CumulioWriter): + # Set flush interval to max value to avoid flushing data + writer.FLUSH_INTERVAL = 10000 + + writer.client.batch_write = MagicMock() # type: ignore + + with pytest.raises(Exception): + # Assert that an Exception is thrown upon trying to write to a non-existing stream + writer.queue_write_operation("non_existing_stream", {"column": "value"}) + + +def test_flush(writer: CumulioWriter): + writer.client.batch_write = MagicMock() # type: ignore + + writer.writers["orders"]["write_buffer"] = [["customer_1", 1, 100.0]] + writer.flush("orders") + assert writer.writers["orders"]["write_buffer"] == [] + + +def test_queue_write_flush_operation(writer: CumulioWriter): + # Set flush interval to 2 to cause flush after second row has been added to buffer + writer.FLUSH_INTERVAL = 2 + + writer.client.batch_write = MagicMock() # type: ignore + + product_data = {"product_id": 1} + writer.queue_write_operation("products", product_data) + assert writer.writers["products"]["write_buffer"] == [[1]] + + product_data = {"product_id": 2} + writer.queue_write_operation("products", product_data) + assert writer.writers["products"]["write_buffer"] == [] + assert writer.writers["products"]["is_first_batch"] is False + + product_data = {"product_id": 3} + writer.queue_write_operation("products", product_data) + assert writer.writers["products"]["write_buffer"] == [[3]] + + +def test_flush_all(writer: CumulioWriter): + writer.client.batch_write = MagicMock() # type: ignore + + writer.writers["orders"]["write_buffer"] = [["cust_1", 1, 100.0]] + writer.writers["products"]["write_buffer"] = [["cust_1", 1, 100.0]] + writer.flush_all() + assert writer.writers["orders"]["write_buffer"] == [] + assert writer.writers["products"]["write_buffer"] == [] + + +def test_delete_stream_entries(writer: CumulioWriter): + writer.client.set_replace_tag_on_dataset = MagicMock() # type: ignore + writer.delete_stream_entries("stream_name") + writer.client.set_replace_tag_on_dataset.assert_called_once_with("stream_name") + + +def _get_cumulio_and_merged_columns(writer: CumulioWriter) -> Mapping[str, Any]: + if len(writer.writers) < 0: + raise Exception("No streams defined for writer") + + result = {} + + for stream_name in writer.writers: + cumulio_columns = writer.client.get_ordered_columns(stream_name) + merged_columns = writer.writers[stream_name]["column_headers"] + result[stream_name] = { + "cumulio_columns": cumulio_columns, + "merged_columns": merged_columns, + } + return result + + +@pytest.fixture +def writer_existing_cumulio_columns( + config: Mapping[str, Any], + configured_catalog: ConfiguredAirbyteCatalog, + logger: MagicMock, +) -> CumulioWriter: + """This will return a CumulioWriter that mocks airbyte stream catalogs that contains the same columns as those existing in Cumul.io.""" + existing_cumulio_columns = {} + for configured_stream in configured_catalog.streams: + existing_cumulio_columns[configured_stream.stream.name] = [ + column_name for column_name in configured_stream.stream.json_schema["properties"] + ] + + def get_existing_cumulio_columns(stream_name): + return existing_cumulio_columns[stream_name] + + with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock: + # Mock get_ordered_columns to return existing_cumulio_columns + cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns) + return CumulioWriter(config, configured_catalog, logger) + + +def test_init_existing_cumulio_columns(writer_existing_cumulio_columns: CumulioWriter): + """Tests whether each stream is correctly initializing update_metadata. + Due to identical columns in Cumul.io for this writer, both are False. + """ + assert writer_existing_cumulio_columns.writers["orders"]["update_metadata"] is False + assert writer_existing_cumulio_columns.writers["products"]["update_metadata"] is False + + +def test_equal_cumulio_and_merged_columns( + writer_existing_cumulio_columns: CumulioWriter, +): + result = _get_cumulio_and_merged_columns(writer_existing_cumulio_columns) + + for stream_name in result: + for index, column in enumerate(result[stream_name]["merged_columns"]): + # Assert that merged_columns are in same order as columns defined on Cumul.io's side. + assert result[stream_name]["cumulio_columns"][index] == column["name"] + + +def test_queue_write_operation_with_correct_data_order( + writer_existing_cumulio_columns: CumulioWriter, +): + writer_existing_cumulio_columns.client.batch_write = MagicMock() # type: ignore + + result = _get_cumulio_and_merged_columns(writer_existing_cumulio_columns) + # Set flush interval to max value to avoid flushing data + writer_existing_cumulio_columns.FLUSH_INTERVAL = 10000 + + order_data = {"order_id": 1, "amount": 100.0, "customer_id": "cust_1"} + writer_existing_cumulio_columns.queue_write_operation("orders", order_data) + expected_data = [] + for column in result["orders"]["merged_columns"]: + expected_data.append(order_data[column["name"]]) + assert writer_existing_cumulio_columns.writers["orders"]["write_buffer"][0] == expected_data + + +@pytest.fixture(name="configured_catalog_with_new_column") +def configured_catalog_with_new_column_fixture() -> ConfiguredAirbyteCatalog: + """Creates a ConfiguredAirbyteCatalog that will be used to mock a new column.""" + # The stream should have at least 2 schema properties (i.e. columns) defined. + orders_stream_schema = { + "type": "object", + "properties": { + "order_id": {"type": "integer"}, + "amount": {"type": "integer"}, + "customer_id": {"type": "string"}, + "customer_name": {"type": "string"}, + }, + } + + orders_append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="orders_append", + json_schema=orders_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + orders_overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="orders_overwrite", + json_schema=orders_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[orders_append_stream, orders_overwrite_stream]) + + +@pytest.fixture +def writer_new_airbyte_column( + config: Mapping[str, Any], + configured_catalog_with_new_column: ConfiguredAirbyteCatalog, + logger: MagicMock, +) -> CumulioWriter: + """This will return a CumulioWriter that mocks airbyte stream catalogs that contains one column that does not exist in Cumul.io.""" + existing_cumulio_columns = {} + for configured_stream in configured_catalog_with_new_column.streams: + columns = [column_name for column_name in configured_stream.stream.json_schema["properties"]] + # get rid of the second element to mimic a new column being defined in configured_stream + del columns[1] + existing_cumulio_columns[configured_stream.stream.name] = columns + + def get_existing_cumulio_columns(stream_name): + return existing_cumulio_columns[stream_name] + + with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock: + # Mock get_ordered_columns to return existing_cumulio_columns (which does not include one column defined in configured stream) + cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns) + cumulio_client_mock.return_value.batch_writer.return_value = None + cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None + return CumulioWriter(config, configured_catalog_with_new_column, logger) + + +def test_init_new_airbyte_column(writer_new_airbyte_column: CumulioWriter): + """Tests whether each stream is correctly initializing update_metadata (due to new Column in Airbyte for this writer, both are True)""" + assert writer_new_airbyte_column.writers["orders_append"]["update_metadata"] is True + assert writer_new_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True + + +def test_new_column_update_metadata(writer_new_airbyte_column: CumulioWriter): + """Tests whether Airbyte streams with at least one new column defined results in update_metadata, + to inform Cumul.io about new column data being pushed.""" + for stream_name in writer_new_airbyte_column.writers: + assert writer_new_airbyte_column.writers[stream_name]["update_metadata"] is True + + +def test_new_column_appended(writer_new_airbyte_column: CumulioWriter): + """Tests whether the Airbyte streams with one new column appends it at the end of the column list""" + result = _get_cumulio_and_merged_columns(writer_new_airbyte_column) + for stream_name in result: + assert len(result[stream_name]["merged_columns"]) == len(result[stream_name]["cumulio_columns"]) + 1 + for index, column in enumerate(result[stream_name]["cumulio_columns"]): + # Assert that merged_columns are in same order as columns defined on Cumul.io's side. + assert result[stream_name]["merged_columns"][index]["name"] == column + with pytest.raises(Exception): + # Test whether last element of merged_columns is the column that is not defined on Cumul.io's end. + result[stream_name]["cumulio_columns"].index(result[stream_name]["merged_columns"][-1]["name"]) + + +@pytest.fixture(name="configured_catalog_with_deleted_column") +def configured_catalog_with_deleted_column_fixture() -> ConfiguredAirbyteCatalog: + """Creates a ConfiguredAirbyteCatalog that will be used to mock a deleted column.""" + orders_stream_schema = { + "type": "object", + "properties": {"order_id": {"type": "integer"}, "amount": {"type": "integer"}}, + } + + orders_append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="orders_append", + json_schema=orders_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + orders_overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="orders_overwrite", + json_schema=orders_stream_schema, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[orders_append_stream, orders_overwrite_stream]) + + +@pytest.fixture +def writer_deleted_airbyte_column( + config: Mapping[str, Any], + configured_catalog_with_deleted_column: ConfiguredAirbyteCatalog, + logger: MagicMock, +) -> CumulioWriter: + """This will return a CumulioWriter that mocks airbyte stream catalogs that doesn't contain one column that does exist in Cumul.io.""" + existing_cumulio_columns = {} + for configured_stream in configured_catalog_with_deleted_column.streams: + columns = [column_name for column_name in configured_stream.stream.json_schema["properties"]] + # Add customer_name column as second element to mimic a deleted column being defined in configured_stream + columns.insert(1, "customer_name") + existing_cumulio_columns[configured_stream.stream.name] = columns + + def get_existing_cumulio_columns(stream_name): + return existing_cumulio_columns[stream_name] + + with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock: + # Mock get_ordered_columns to return existing_cumulio_columns (which does not include one column defined in configured stream) + cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns) + cumulio_client_mock.return_value.batch_writer.return_value = None + cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None + return CumulioWriter(config, configured_catalog_with_deleted_column, logger) + + +def test_init_deleted_airbyte_column(writer_deleted_airbyte_column: CumulioWriter): + """Assert each stream is correctly initializing update_metadata. + Due to deleted Column in Airbyte for this writer: + - the update_metadata property for the orders dataset is set to False, as it's in append mode and thus should keep existing structure + - the update_metadata property for the orders dataset is set to True, as it's in overwrite mode + """ + assert writer_deleted_airbyte_column.writers["orders_append"]["update_metadata"] is False + assert writer_deleted_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True + + +def test_deleted_column_update_metadata(writer_deleted_airbyte_column: CumulioWriter): + """Tests whether Airbyte streams that do not contain a column defined on Cumul.io's side results in update_metadata for only + overwrite streams (to inform Cumul.io about new column data being pushed)""" + assert writer_deleted_airbyte_column.writers["orders_append"]["update_metadata"] is False + assert writer_deleted_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True + + +def test_merged_columns_order_for_deleted_column( + writer_deleted_airbyte_column: CumulioWriter, +): + """Tests whether Airbyte streams that do not contain a column defined on Cumul.io's side still correctly puts the other columns in + the right order""" + result = _get_cumulio_and_merged_columns(writer_deleted_airbyte_column) + for stream_name in result: + # Test whether merged_columns contains one less element + assert len(result[stream_name]["merged_columns"]) == len(result[stream_name]["cumulio_columns"]) - 1 + + cumulio_columns_without_deleted = [ + column_name for column_name in result[stream_name]["cumulio_columns"] if column_name != "customer_name" + ] + # Test whether elements, without deleted column, are equal and in the same position + assert cumulio_columns_without_deleted == [column["name"] for column in result[stream_name]["merged_columns"]] diff --git a/airbyte-integrations/connectors/destination-databend/.dockerignore b/airbyte-integrations/connectors/destination-databend/.dockerignore new file mode 100644 index 0000000..57f4cf3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_databend +!setup.py diff --git a/airbyte-integrations/connectors/destination-kvdb/Dockerfile b/airbyte-integrations/connectors/destination-databend/Dockerfile similarity index 90% rename from airbyte-integrations/connectors/destination-kvdb/Dockerfile rename to airbyte-integrations/connectors/destination-databend/Dockerfile index 31fd4d4..df2af68 100644 --- a/airbyte-integrations/connectors/destination-kvdb/Dockerfile +++ b/airbyte-integrations/connectors/destination-databend/Dockerfile @@ -29,10 +29,10 @@ RUN apk --no-cache add bash # copy payload code only COPY main.py ./ -COPY destination_kvdb ./destination_kvdb +COPY destination_databend ./destination_databend ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] LABEL io.airbyte.version=0.1.2 -LABEL io.airbyte.name=airbyte/destination-kvdb +LABEL io.airbyte.name=airbyte/destination-databend diff --git a/airbyte-integrations/connectors/destination-databend/README.md b/airbyte-integrations/connectors/destination-databend/README.md new file mode 100644 index 0000000..9b50cd9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/README.md @@ -0,0 +1,99 @@ +# Databend Destination + +This is the repository for the Databend destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/databend). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/databend) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_databend/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination databend test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-databend build +``` + +An image will be built with the tag `airbyte/destination-databend:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-databend:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-databend:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-databend:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-databend:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-databend test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-databend test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/databend.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py new file mode 100644 index 0000000..5be4069 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationDatabend + +__all__ = ["DestinationDatabend"] diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc new file mode 100644 index 0000000..4538a0a Binary files /dev/null and b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc differ diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/client.py b/airbyte-integrations/connectors/destination-databend/destination_databend/client.py new file mode 100644 index 0000000..1764093 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/destination_databend/client.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from databend_sqlalchemy import connector + + +class DatabendClient: + def __init__(self, host: str, port: int, database: str, table: str, username: str, password: str = None): + self.host = host + self.port = port + self.database = database + self.table = table + self.username = username + self.password = password + + def open(self): + handle = connector.connect(f"https://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}").cursor() + + return handle diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py b/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py new file mode 100644 index 0000000..365575e --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py @@ -0,0 +1,89 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from datetime import datetime +from logging import getLogger +from typing import Any, Iterable, Mapping +from uuid import uuid4 + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type +from destination_databend.client import DatabendClient + +from .writer import create_databend_wirter + +logger = getLogger("airbyte") + + +class DestinationDatabend(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + + """ + TODO + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + streams = {s.stream.name for s in configured_catalog.streams} + client = DatabendClient(**config) + + writer = create_databend_wirter(client, logger) + + for configured_stream in configured_catalog.streams: + if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite: + writer.delete_table(configured_stream.stream.name) + logger.info(f"Stream {configured_stream.stream.name} is wiped.") + writer.create_raw_table(configured_stream.stream.name) + + for message in input_messages: + if message.type == Type.STATE: + yield message + elif message.type == Type.RECORD: + data = message.record.data + stream = message.record.stream + # Skip unselected streams + if stream not in streams: + logger.debug(f"Stream {stream} was not present in configured streams, skipping") + continue + writer.queue_write_data(stream, str(uuid4()), datetime.now(), json.dumps(data)) + + # Flush any leftover messages + writer.flush() + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + client = DatabendClient(**config) + cursor = client.open() + cursor.execute("DROP TABLE IF EXISTS test") + cursor.execute("CREATE TABLE if not exists test (x Int32,y VARCHAR)") + cursor.execute("INSERT INTO test (x,y) VALUES (%,%)", [1, "yy", 2, "xx"]) + cursor.execute("DROP TABLE IF EXISTS test") + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json b/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json new file mode 100644 index 0000000..e77d330 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json @@ -0,0 +1,57 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/databend", + "supported_destination_sync_modes": ["overwrite", "append"], + "supportsIncremental": true, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Databend", + "type": "object", + "required": ["host", "username", "database"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 443, + "examples": ["443"], + "order": 2 + }, + "database": { + "title": "DB Name", + "description": "Name of the database.", + "type": "string", + "order": 3 + }, + "table": { + "title": "Default Table", + "description": "The default table was written to.", + "type": "string", + "examples": ["default"], + "default": "default", + "order": 4 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 5 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py b/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py new file mode 100644 index 0000000..006ff96 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from collections import defaultdict +from datetime import datetime +from itertools import chain + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.models import AirbyteConnectionStatus, Status +from destination_databend.client import DatabendClient + + +class DatabendWriter: + """ + Base class for shared writer logic. + """ + + flush_interval = 1000 + + def __init__(self, client: DatabendClient) -> None: + """ + :param client: Databend SDK connection class with established connection + to the databse. + """ + try: + # open a cursor and do some work with it + self.client = client + self.cursor = client.open() + self._buffer = defaultdict(list) + self._values = 0 + except Exception as e: + # handle the exception + raise AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") + finally: + # close the cursor + self.cursor.close() + + def delete_table(self, name: str) -> None: + """ + Delete the resulting table. + Primarily used in Overwrite strategy to clean up previous data. + + :param name: table name to delete. + """ + self.cursor.execute(f"DROP TABLE IF EXISTS _airbyte_raw_{name}") + + def create_raw_table(self, name: str): + """ + Create the resulting _airbyte_raw table. + + :param name: table name to create. + """ + query = f""" + CREATE TABLE IF NOT EXISTS _airbyte_raw_{name} ( + _airbyte_ab_id TEXT, + _airbyte_emitted_at TIMESTAMP, + _airbyte_data TEXT + ) + """ + cursor = self.cursor + cursor.execute(query) + + def queue_write_data(self, stream_name: str, id: str, time: datetime, record: str) -> None: + """ + Queue up data in a buffer in memory before writing to the database. + When flush_interval is reached data is persisted. + + :param stream_name: name of the stream for which the data corresponds. + :param id: unique identifier of this data row. + :param time: time of writing. + :param record: string representation of the json data payload. + """ + self._buffer[stream_name].append((id, time, record)) + self._values += 1 + if self._values == self.flush_interval: + self._flush() + + def _flush(self): + """ + Stub for the intermediate data flush that's triggered during the + buffering operation. + """ + raise NotImplementedError() + + def flush(self): + """ + Stub for the data flush at the end of writing operation. + """ + raise NotImplementedError() + + +class DatabendSQLWriter(DatabendWriter): + """ + Data writer using the SQL writing strategy. Data is buffered in memory + and flushed using INSERT INTO SQL statement. + """ + + flush_interval = 1000 + + def __init__(self, client: DatabendClient) -> None: + """ + :param client: Databend SDK connection class with established connection + to the databse. + """ + super().__init__(client) + + def _flush(self) -> None: + """ + Intermediate data flush that's triggered during the + buffering operation. Writes data stored in memory via SQL commands. + databend connector insert into table using stage + """ + cursor = self.cursor + # id, written_at, data + for table, data in self._buffer.items(): + cursor.execute( + f"INSERT INTO _airbyte_raw_{table} (_airbyte_ab_id,_airbyte_emitted_at,_airbyte_data) VALUES (%, %, %)", + list(chain.from_iterable(data)), + ) + self._buffer.clear() + self._values = 0 + + def flush(self) -> None: + """ + Final data flush after all data has been written to memory. + """ + self._flush() + + +def create_databend_wirter(client: DatabendClient, logger: AirbyteLogger) -> DatabendWriter: + logger.info("Using the SQL writing strategy") + writer = DatabendSQLWriter(client) + return writer diff --git a/airbyte-integrations/connectors/destination-databend/icon.svg b/airbyte-integrations/connectors/destination-databend/icon.svg new file mode 100644 index 0000000..b6afca7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py similarity index 62% rename from airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py rename to airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py index 5e083ac..a40494c 100644 --- a/airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py +++ b/airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py @@ -3,10 +3,10 @@ # import json +import logging from typing import Any, Dict, List, Mapping import pytest -from airbyte_cdk import AirbyteLogger from airbyte_cdk.models import ( AirbyteMessage, AirbyteRecordMessage, @@ -19,11 +19,11 @@ SyncMode, Type, ) -from destination_kvdb import DestinationKvdb -from destination_kvdb.client import KvDbClient +from destination_databend import DestinationDatabend +from destination_databend.client import DatabendClient -@pytest.fixture(name="config") +@pytest.fixture(name="databendConfig") def config_fixture() -> Mapping[str, Any]: with open("secrets/config.json", "r") as f: return json.loads(f.read()) @@ -49,24 +49,25 @@ def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: @pytest.fixture(autouse=True) -def teardown(config: Mapping): +def teardown(databendConfig: Mapping): yield - client = KvDbClient(**config) - client.delete(list(client.list_keys())) + client = DatabendClient(**databendConfig) + cursor = client.open() + cursor.close() @pytest.fixture(name="client") -def client_fixture(config) -> KvDbClient: - return KvDbClient(**config) +def client_fixture(databendConfig) -> DatabendClient: + return DatabendClient(**databendConfig) -def test_check_valid_config(config: Mapping): - outcome = DestinationKvdb().check(AirbyteLogger(), config) +def test_check_valid_config(databendConfig: Mapping): + outcome = DestinationDatabend().check(logging.getLogger("airbyte"), databendConfig) assert outcome.status == Status.SUCCEEDED def test_check_invalid_config(): - outcome = DestinationKvdb().check(AirbyteLogger(), {"bucket_id": "not_a_real_id"}) + outcome = DestinationDatabend().check(logging.getLogger("airbyte"), {"bucket_id": "not_a_real_id"}) assert outcome.status == Status.FAILED @@ -80,19 +81,29 @@ def _record(stream: str, str_value: str, int_value: int) -> AirbyteMessage: ) -def retrieve_all_records(client: KvDbClient) -> List[AirbyteRecordMessage]: - """retrieves and formats all records in kvdb as Airbyte messages""" - all_records = client.list_keys(list_values=True) +def retrieve_records(stream_name: str, client: DatabendClient) -> List[AirbyteRecordMessage]: + cursor = client.open() + cursor.execute(f"select * from _airbyte_raw_{stream_name}") + all_records = cursor.fetchall() out = [] for record in all_records: - key = record[0] - stream = key.split("__ab__")[0] - value = record[1] - out.append(_record(stream, value["str_col"], value["int_col"])) + # key = record[0] + # stream = key.split("__ab__")[0] + value = json.loads(record[2]) + out.append(_record(stream_name, value["str_col"], value["int_col"])) return out -def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, client: KvDbClient): +def retrieve_all_records(client: DatabendClient) -> List[AirbyteRecordMessage]: + """retrieves and formats all records in databend as Airbyte messages""" + overwrite_stream = "overwrite_stream" + append_stream = "append_stream" + overwrite_out = retrieve_records(overwrite_stream, client) + append_out = retrieve_records(append_stream, client) + return overwrite_out + append_out + + +def test_write(databendConfig: Mapping, configured_catalog: ConfiguredAirbyteCatalog, client: DatabendClient): """ This test verifies that: 1. writing a stream in "overwrite" mode overwrites any existing data for that stream @@ -108,19 +119,19 @@ def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, cl _record(overwrite_stream, str(i), i) for i in range(5, 10) ] - destination = DestinationKvdb() + destination = DestinationDatabend() expected_states = [first_state_message, second_state_message] output_states = list( destination.write( - config, configured_catalog, [*first_record_chunk, first_state_message, *second_record_chunk, second_state_message] + databendConfig, configured_catalog, [*first_record_chunk, first_state_message, *second_record_chunk, second_state_message] ) ) assert expected_states == output_states, "Checkpoint state messages were expected from the destination" expected_records = [_record(append_stream, str(i), i) for i in range(10)] + [_record(overwrite_stream, str(i), i) for i in range(10)] records_in_destination = retrieve_all_records(client) - assert expected_records == records_in_destination, "Records in destination should match records expected" + assert len(expected_records) == len(records_in_destination), "Records in destination should match records expected" # After this sync we expect the append stream to have 15 messages and the overwrite stream to have 5 third_state_message = _state({"state": "3"}) @@ -128,11 +139,21 @@ def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, cl _record(overwrite_stream, str(i), i) for i in range(10, 15) ] - output_states = list(destination.write(config, configured_catalog, [*third_record_chunk, third_state_message])) + output_states = list(destination.write(databendConfig, configured_catalog, [*third_record_chunk, third_state_message])) assert [third_state_message] == output_states records_in_destination = retrieve_all_records(client) expected_records = [_record(append_stream, str(i), i) for i in range(15)] + [ _record(overwrite_stream, str(i), i) for i in range(10, 15) ] - assert expected_records == records_in_destination + assert len(expected_records) == len(records_in_destination) + + tear_down(client) + + +def tear_down(client: DatabendClient): + overwrite_stream = "overwrite_stream" + append_stream = "append_stream" + cursor = client.open() + cursor.execute(f"DROP table _airbyte_raw_{overwrite_stream}") + cursor.execute(f"DROP table _airbyte_raw_{append_stream}") diff --git a/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json b/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json new file mode 100644 index 0000000..62c0cdb --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json @@ -0,0 +1,9 @@ +{ + "protocol": "https", + "host": "tnc7yee14--xxxx.ch.datafusecloud.com", + "port": 443, + "username": "username", + "password": "password", + "database": "default", + "table": "default" +} diff --git a/airbyte-integrations/connectors/destination-databend/main.py b/airbyte-integrations/connectors/destination-databend/main.py new file mode 100644 index 0000000..7482c00 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_databend import DestinationDatabend + +if __name__ == "__main__": + DestinationDatabend().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-databend/metadata.yaml b/airbyte-integrations/connectors/destination-databend/metadata.yaml new file mode 100644 index 0000000..5963349 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 302e4d8e-08d3-4098-acd4-ac67ca365b88 + dockerImageTag: 0.1.2 + dockerRepository: airbyte/destination-databend + githubIssueLabel: destination-databend + icon: databend.svg + license: MIT + name: Databend + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/databend + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-databend/requirements.txt b/airbyte-integrations/connectors/destination-databend/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-databend/setup.py b/airbyte-integrations/connectors/destination-databend/setup.py new file mode 100644 index 0000000..49878e3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/setup.py @@ -0,0 +1,22 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "requests", "databend-sqlalchemy==0.1.6"] + +TEST_REQUIREMENTS = ["pytest~=6.1"] +setup( + name="destination_databend", + description="Destination implementation for Databend.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py b/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py new file mode 100644 index 0000000..e5a7c7e --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py @@ -0,0 +1,161 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from datetime import datetime +from typing import Dict +from unittest.mock import AsyncMock, MagicMock, call, patch + +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + SyncMode, + Type, +) +from destination_databend.destination import DatabendClient, DestinationDatabend +from pytest import fixture + + +@fixture +def logger() -> MagicMock: + return MagicMock() + + +@fixture +def config() -> Dict[str, str]: + args = { + "database": "default", + "username": "root", + "password": "root", + "host": "localhost", + "port": 8081, + "table": "default", + } + return args + + +@fixture(name="mock_connection") +def async_connection_cursor_mock(): + connection = MagicMock() + cursor = AsyncMock() + connection.cursor.return_value = cursor + return connection, cursor + + +@fixture +def configured_stream1() -> ConfiguredAirbyteStream: + return ConfiguredAirbyteStream( + stream=AirbyteStream( + name="table1", + json_schema={ + "type": "object", + "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}}, + }, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + +@fixture +def configured_stream2() -> ConfiguredAirbyteStream: + return ConfiguredAirbyteStream( + stream=AirbyteStream( + name="table2", + json_schema={ + "type": "object", + "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}}, + }, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + +@fixture +def airbyte_message1() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="table1", + data={"key1": "value1", "key2": 2}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@fixture +def airbyte_message2() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="table2", + data={"key1": "value2", "key2": 3}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@fixture +def airbyte_state_message() -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE) + + +@patch("destination_databend.client.DatabendClient", MagicMock()) +def test_connection(config: Dict[str, str], logger: MagicMock) -> None: + # Check no log object + DatabendClient(**config) + + +@patch("destination_databend.writer.DatabendSQLWriter") +@patch("destination_databend.client.DatabendClient") +def test_sql_write_append( + mock_connection: MagicMock, + mock_writer: MagicMock, + config: Dict[str, str], + configured_stream1: ConfiguredAirbyteStream, + configured_stream2: ConfiguredAirbyteStream, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, +) -> None: + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2]) + + destination = DestinationDatabend() + result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2]) + + assert list(result) == [airbyte_state_message] + mock_writer.return_value.delete_table.assert_not_called() + mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")] + assert len(mock_writer.return_value.queue_write_data.mock_calls) == 2 + mock_writer.return_value.flush.assert_called_once() + + +@patch("destination_databend.writer.DatabendSQLWriter") +@patch("destination_databend.client.DatabendClient") +def test_sql_write_overwrite( + mock_connection: MagicMock, + mock_writer: MagicMock, + config: Dict[str, str], + configured_stream1: ConfiguredAirbyteStream, + configured_stream2: ConfiguredAirbyteStream, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, +): + # Overwrite triggers a delete + configured_stream1.destination_sync_mode = DestinationSyncMode.overwrite + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2]) + + destination = DestinationDatabend() + result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2]) + + assert list(result) == [airbyte_state_message] + mock_writer.return_value.delete_table.assert_called_once_with("table1") + mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")] diff --git a/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py b/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py new file mode 100644 index 0000000..0b68b11 --- /dev/null +++ b/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py @@ -0,0 +1,46 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Union +from unittest.mock import MagicMock + +from destination_databend.writer import DatabendSQLWriter +from pytest import fixture, mark + + +@fixture +def client() -> MagicMock: + return MagicMock() + + +@fixture +def sql_writer(client: MagicMock) -> DatabendSQLWriter: + return DatabendSQLWriter(client) + + +def test_sql_default(sql_writer: DatabendSQLWriter) -> None: + assert len(sql_writer._buffer) == 0 + assert sql_writer.flush_interval == 1000 + + +@mark.parametrize("writer", ["sql_writer"]) +def test_sql_create(client: MagicMock, writer: Union[DatabendSQLWriter], request: Any) -> None: + writer = request.getfixturevalue(writer) + writer.create_raw_table("dummy") + + +def test_data_buffering(sql_writer: DatabendSQLWriter) -> None: + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + sql_writer._buffer["dummy"][0] == ("id1", 20200101, '{"key": "value"}') + assert len(sql_writer._buffer["dummy"]) == 1 + assert len(sql_writer._buffer.keys()) == 1 + sql_writer.queue_write_data("dummy", "id2", 20200102, '{"key2": "value2"}') + sql_writer._buffer["dummy"][0] == ("id2", 20200102, '{"key2": "value2"}') + assert len(sql_writer._buffer["dummy"]) == 2 + assert len(sql_writer._buffer.keys()) == 1 + sql_writer.queue_write_data("dummy2", "id3", 20200103, '{"key3": "value3"}') + sql_writer._buffer["dummy"][0] == ("id3", 20200103, '{"key3": "value3"}') + assert len(sql_writer._buffer["dummy"]) == 2 + assert len(sql_writer._buffer["dummy2"]) == 1 + assert len(sql_writer._buffer.keys()) == 2 diff --git a/airbyte-integrations/connectors/destination-doris/README.md b/airbyte-integrations/connectors/destination-doris/README.md new file mode 100644 index 0000000..b67c3bd --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/README.md @@ -0,0 +1,72 @@ +# Destination Doris + +This is the repository for the Doris destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/doris). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-doris:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-doris:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-doris:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-doris:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-doris:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/doris`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/dorisDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-doris:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-doris:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-doris test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/doris.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-doris/bootstrap.md b/airbyte-integrations/connectors/destination-doris/bootstrap.md new file mode 100644 index 0000000..30f9d07 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/bootstrap.md @@ -0,0 +1,29 @@ +# Doris destination + + +Doris destination adopts MySQL protocol(JDBC) and Doris Stream Load to exchange data. + +1. JDBC is used to manipulate the data table structure and execute the create table statement before data import +2. Stream Load is a synchronous import method based on HTTP/HTTPS, For Doris destination, first pre-write csv file, and then write to doris with Stream Load transaction operation. + +## Introduction to Apache Doris + +Apache Doris is a high-performance, real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios. Based on this, Apache Doris can better meet the scenarios of report analysis, ad-hoc query, unified data warehouse, Data Lake Query Acceleration, etc. Users can build user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, order analysis, and other applications on top of this. +[https://doris.apache.org/docs/summary/basic-summary](https://doris.apache.org/docs/summary/basic-summary) + + +## Technical Overview +The overall architecture of Apache Doris is shown in the following figure. The Doris architecture is very simple, with only two types of processes. + +#### Frontend(FE): +##### It is mainly responsible for user request access, query parsing and planning, management of metadata, and node management-related work. +#### Backend(BE): +##### It is mainly responsible for data storage and query plan execution. + +Both types of processes are horizontally scalable, and a single cluster can support up to hundreds of machines and tens of petabytes of storage capacity. And these two types of processes guarantee high availability of services and high reliability of data through consistency protocols. This highly integrated architecture design greatly reduces the operation and maintenance cost of a distributed system. + +Apache Doris adopts MySQL protocol, highly compatible with MySQL dialect, and supports standard SQL. Users can access Doris through various client tools and support seamless connection with BI tools. + +[Stream load](https://doris.apache.org/docs/data-operate/import/import-way/stream-load-manual/) is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. Stream load is mainly suitable for importing local files or data from data streams through procedures. + +Each import job of Doris, whether it is batch import using Stream Load or single import using INSERT statement, is a complete transaction operation. The import transaction can ensure that the data in a batch takes effect atomically, and there will be no partial data writing. \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-doris/build.gradle b/airbyte-integrations/connectors/destination-doris/build.gradle new file mode 100644 index 0000000..1fe67aa --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/build.gradle @@ -0,0 +1,28 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.doris.DorisDestination' +} + +dependencies { + implementation 'org.apache.commons:commons-csv:1.4' + implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.16' +} diff --git a/airbyte-integrations/connectors/destination-doris/icon.svg b/airbyte-integrations/connectors/destination-doris/icon.svg new file mode 100644 index 0000000..314ad5f --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-doris/metadata.yaml b/airbyte-integrations/connectors/destination-doris/metadata.yaml new file mode 100644 index 0000000..6ba856f --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 05c161bf-ca73-4d48-b524-d392be417002 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-doris + githubIssueLabel: destination-doris + icon: apachedoris.svg + license: MIT + name: Apache Doris + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/doris + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java new file mode 100644 index 0000000..7445013 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.fasterxml.jackson.databind.JsonNode; + +public class DorisConnectionOptions { + + private String db; + private static String DB_KEY = "database"; + private String table; + private static final String TABLE_KEY = "table"; + + private String user; + private static final String USER_KEY = "username"; + + private String pwd; + private static final String PWD_KEY = "password"; + + private String feHost; + private static final String FE_HOST_KEY = "host"; + + private Integer feHttpPort; + private static final String FE_HTTP_PORT_KEY = "httpport"; + + private Integer feQueryPort; + private static final String FE_QUERY_PORT_KEY = "queryport"; + + public static DorisConnectionOptions getDorisConnection(final JsonNode config, String table) { + return new DorisConnectionOptions( + config.get(DB_KEY).asText(), + table, + config.get(USER_KEY).asText(), + config.get(PWD_KEY) == null ? "" : config.get(PWD_KEY).asText(), + config.get(FE_HOST_KEY).asText(), + config.get(FE_HTTP_PORT_KEY).asInt(8030), + config.get(FE_QUERY_PORT_KEY).asInt(9030)); + + } + + public DorisConnectionOptions(String db, String table, String user, String pwd, String feHost, Integer feHttpPort, Integer feQueryPort) { + this.db = db; + this.table = table; + this.user = user; + this.pwd = pwd; + this.feHost = feHost; + this.feHttpPort = feHttpPort; + this.feQueryPort = feQueryPort; + } + + public String getDb() { + return db; + } + + public String getTable() { + return table; + } + + public String getUser() { + return user; + } + + public String getPwd() { + return pwd; + } + + public String getFeHost() { + return feHost; + } + + public Integer getFeHttpPort() { + return feHttpPort; + } + + public String getHttpHostPort() { + return feHost + ":" + feHttpPort; + } + + public String getQueryHostPort() { + return feHost + ":" + feHttpPort; + } + + public Integer getFeQueryPort() { + return feQueryPort; + } + + @Override + public String toString() { + return "DorisConnectionOptions{" + + "db='" + db + '\'' + + ", table='" + table + '\'' + + ", user='" + user + '\'' + + ", pwd='" + pwd + '\'' + + ", feHost='" + feHost + '\'' + + ", feHttpPort=" + feHttpPort + + ", feQueryPort=" + feQueryPort + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java new file mode 100644 index 0000000..db64c82 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.fasterxml.jackson.core.io.JsonStringEncoder; +import io.airbyte.cdk.integrations.base.CommitOnStateAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DorisConsumer extends CommitOnStateAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(DorisConsumer.class); + + private final ConfiguredAirbyteCatalog catalog; + private final Map writeConfigs; + + private JsonStringEncoder jsonEncoder; + + public DorisConsumer( + final Map writeConfigs, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + super(outputRecordCollector); + jsonEncoder = JsonStringEncoder.getInstance(); + this.catalog = catalog; + this.writeConfigs = writeConfigs; + LOGGER.info("initializing DorisConsumer."); + } + + @Override + public void commit() throws Exception { + for (final DorisWriteConfig writeConfig : writeConfigs.values()) { + writeConfig.getWriter().flush(); + } + } + + @Override + protected void startTracked() throws Exception {} + + @Override + protected void acceptTracked(AirbyteMessage msg) throws Exception { + if (msg.getType() != AirbyteMessage.Type.RECORD) { + return; + } + final AirbyteRecordMessage recordMessage = msg.getRecord(); + if (!writeConfigs.containsKey(recordMessage.getStream())) { + throw new IllegalArgumentException( + String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s , \nmessage: %s", + Jsons.serialize(catalog), Jsons.serialize(recordMessage))); + } + + writeConfigs.get(recordMessage.getStream()).getWriter().printRecord( + UUID.randomUUID(), + // new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(recordMessage.getEmittedAt())), + recordMessage.getEmittedAt(), + new String(jsonEncoder.quoteAsString(Jsons.serialize(recordMessage.getData())))); + + } + + @Override + protected void close(boolean hasFailed) throws Exception { + LOGGER.info("finalizing DorisConsumer"); + for (final Map.Entry entries : writeConfigs.entrySet()) { + try { + entries.getValue().getWriter().flush(); + entries.getValue().getWriter().close(); + } catch (final Exception e) { + hasFailed = true; + LOGGER.error("failed to close writer for: {}", entries.getKey()); + } + } + + try { + for (final DorisWriteConfig value : writeConfigs.values()) { + value.getDorisStreamLoad().firstCommit(); + } + } catch (final Exception e) { + hasFailed = true; + final String message = "Failed to pre-commit doris in destination: "; + LOGGER.error(message + e.getMessage()); + for (final DorisWriteConfig value : writeConfigs.values()) { + if (value.getDorisStreamLoad().getTxnID() > 0) + value.getDorisStreamLoad().abortTransaction(); + } + } + + // + try { + if (!hasFailed) { + for (final DorisWriteConfig writeConfig : writeConfigs.values()) { + if (writeConfig.getDorisStreamLoad().getTxnID() > 0) + writeConfig.getDorisStreamLoad().commitTransaction(); + LOGGER.info(String.format("stream load commit (TxnID: %s ) successed ", writeConfig.getDorisStreamLoad().getTxnID())); + } + } else { + final String message = "Failed to commit doris in destination"; + LOGGER.error(message); + for (final DorisWriteConfig writeConfig : writeConfigs.values()) { + if (writeConfig.getDorisStreamLoad().getTxnID() > 0) + writeConfig.getDorisStreamLoad().abortTransaction(); + } + throw new IOException(message); + } + } finally { + for (final DorisWriteConfig writeConfig : writeConfigs.values()) { + Files.deleteIfExists(writeConfig.getDorisStreamLoad().getPath()); + writeConfig.getDorisStreamLoad().close(); + } + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java new file mode 100644 index 0000000..12fd21b --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import static io.airbyte.integrations.destination.doris.DorisStreamLoad.CSV_COLUMN_SEPARATOR; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.*; +import java.util.*; +import java.util.function.Consumer; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DorisDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(DorisDestination.class); + private static final StandardNameTransformer namingResolver = new StandardNameTransformer(); + private static Connection conn = null; + private static HttpUtil http = new HttpUtil(); + static final String DESTINATION_TEMP_PATH_FIELD = "destination_temp_path"; + private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; + private static final String DB_URL_PATTERN = "jdbc:mysql://%s:%d/%s?rewriteBatchedStatements=true&useUnicode=true&characterEncoding=utf8"; + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new DorisDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + try { + Preconditions.checkNotNull(config); + FileUtils.forceMkdir(getTempPathDir(config).toFile()); + checkDorisAndConnect(config); + } catch (final Exception e) { + return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage()); + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) + throws IOException, SQLException { + final Map writeConfigs = new HashMap<>(); + + try { + final Path destinationDir = getTempPathDir(config); + FileUtils.forceMkdir(destinationDir.toFile()); + for (ConfiguredAirbyteStream stream : configuredCatalog.getStreams()) { + + final DestinationSyncMode syncMode = stream.getDestinationSyncMode(); + if (syncMode == null) { + throw new IllegalStateException("Undefined destination sync mode"); + } + + final String streamName = stream.getStream().getName(); + final String tableName = namingResolver.getIdentifier(streamName); + final String tmpTableName = namingResolver.getTmpTableName(streamName); + final Path tmpPath = destinationDir.resolve(tmpTableName + ".csv"); + if (conn == null) + checkDorisAndConnect(config); + Statement stmt = conn.createStatement(); + stmt.execute(createTableQuery(tableName)); + if (syncMode == DestinationSyncMode.OVERWRITE) { + stmt.execute(truncateTable(tableName)); + } + CSVFormat csvFormat = CSVFormat.DEFAULT + .withSkipHeaderRecord() + .withDelimiter(CSV_COLUMN_SEPARATOR) + .withQuote(null) + .withHeader( + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_DATA); + final FileWriter fileWriter = new FileWriter(tmpPath.toFile(), Charset.defaultCharset(), false); + final CSVPrinter printer = new CSVPrinter(fileWriter, csvFormat); + DorisStreamLoad dorisStreamLoad = new DorisStreamLoad( + tmpPath, + DorisConnectionOptions.getDorisConnection(config, tableName), + new DorisLabelInfo("airbyte_doris", tableName, true), + http.getClient(), + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_DATA); + writeConfigs.put(streamName, new DorisWriteConfig(dorisStreamLoad, printer, csvFormat)); + } + } catch (SQLException | ClassNotFoundException e) { + LOGGER.error("Exception while creating Doris destination table: ", e); + throw new SQLException(e); + } catch (IOException e) { + LOGGER.error("Exception while handling temporary csv files : ", e); + throw new IOException(e); + } finally { + if (conn != null) + conn.close(); + } + return new DorisConsumer(writeConfigs, configuredCatalog, outputRecordCollector); + } + + protected void checkDorisAndConnect(JsonNode config) throws ClassNotFoundException, SQLException { + DorisConnectionOptions dorisConnection = DorisConnectionOptions.getDorisConnection(config, ""); + String dbUrl = String.format(DB_URL_PATTERN, dorisConnection.getFeHost(), dorisConnection.getFeQueryPort(), dorisConnection.getDb()); + Class.forName(JDBC_DRIVER); + conn = DriverManager.getConnection(dbUrl, dorisConnection.getUser(), dorisConnection.getPwd()); + } + + protected String createTableQuery(String tableName) { + String s = "CREATE TABLE IF NOT EXISTS `" + tableName + "` ( \n" + + "`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "` varchar(40),\n" + + "`" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "` BIGINT,\n" + + "`" + JavaBaseConstants.COLUMN_NAME_DATA + "` String)\n" + + "DUPLICATE KEY(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`,`" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "`) \n" + + "DISTRIBUTED BY HASH(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`) BUCKETS 16 \n" + + "PROPERTIES ( \n" + + "\"replication_allocation\" = \"tag.location.default: 1\" \n" + + ");"; + LOGGER.info("create doris table SQL : \n " + s); + return s; + } + + protected String truncateTable(String tableName) { + String s = "TRUNCATE TABLE `" + tableName + "`;"; + LOGGER.info("truncate doris table SQL : \n " + s); + return s; + } + + protected Path getTempPathDir(final JsonNode config) { + Path path = Paths.get(DESTINATION_TEMP_PATH_FIELD); + Preconditions.checkNotNull(path); + if (!path.startsWith("/code/local")) { + path = Path.of("/local", path.toString()); + } + final Path normalizePath = path.normalize(); + if (!normalizePath.startsWith("/local")) { + throw new IllegalArgumentException("Stream Load destination temp file should be inside the /local directory"); + } + return path; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java new file mode 100644 index 0000000..19182ee --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import java.util.UUID; + +public class DorisLabelInfo { + + private String prefix; + + private String table; + + private boolean enable2PC; + + public DorisLabelInfo(String labelPrefix, String table, boolean enable2PC) { + this.prefix = labelPrefix; + this.table = table; + this.enable2PC = enable2PC; + } + + public String label() { + return prefix + "_" + table + "_" + UUID.randomUUID() + System.currentTimeMillis(); + } + + public String label(long chkId) { + return prefix + "_" + chkId; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java new file mode 100644 index 0000000..92051a9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import io.airbyte.integrations.destination.doris.exception.DorisRuntimeException; +import io.airbyte.integrations.destination.doris.exception.StreamLoadException; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.Future; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.InputStreamEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DorisStreamLoad { + + private static final Logger LOGGER = LoggerFactory.getLogger(DorisStreamLoad.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + public static final Pattern LABEL_EXIST_PATTERN = + Pattern.compile("errCode = 2, detailMessage = Label \\[(.*)\\] " + + "has already been used, relate to txn \\[(\\d+)\\]"); + public static final Pattern COMMITTED_PATTERN = + Pattern.compile("errCode = 2, detailMessage = transaction \\[(\\d+)\\] " + + "is already \\b(COMMITTED|committed|VISIBLE|visible)\\b, not pre-committed."); + private final DorisLabelInfo dorisLabelInfo; + private static final String LOAD_FIRST_URL_PATTERN = "http://%s/api/%s/%s/_stream_load"; + private static final String LOAD_SECOND_URL_PATTERN = "http://%s/api/%s/_stream_load_2pc"; + private static final String LINE_DELIMITER_DEFAULT = "\n"; + public static final Character CSV_COLUMN_SEPARATOR = '\t'; + + private final String hostPort; + private final String loadUrlStr; + private final String secondUrlStr; + private final String user; + private final String passwd; + private final boolean enable2PC; + private final Properties streamLoadProp; + private final Integer maxRetry; + private Long txnID = 0L; + private final Path path; + private Future pendingLoadFuture; + private final CloseableHttpClient httpClient; + public static final String SUCCESS = "Success"; + public static final String PUBLISH_TIMEOUT = "Publish Timeout"; + private static final List DORIS_SUCCESS_STATUS = new ArrayList<>(Arrays.asList(SUCCESS, PUBLISH_TIMEOUT)); + public static final String FAIL = "Fail"; + + public DorisStreamLoad( + Path path, + DorisConnectionOptions dorisOptions, + DorisLabelInfo dorisLabelInfo, + CloseableHttpClient httpClient, + String... head) { + this.hostPort = dorisOptions.getHttpHostPort(); + String db = dorisOptions.getDb(); + this.user = dorisOptions.getUser(); + this.passwd = dorisOptions.getPwd(); + this.dorisLabelInfo = dorisLabelInfo; + this.loadUrlStr = String.format(LOAD_FIRST_URL_PATTERN, hostPort, db, dorisOptions.getTable()); + this.secondUrlStr = String.format(LOAD_SECOND_URL_PATTERN, hostPort, db); + this.enable2PC = true; + + StringBuilder stringBuilder = new StringBuilder(); + for (String s : head) { + if (!stringBuilder.isEmpty()) + stringBuilder.append(","); + stringBuilder.append(s); + } + this.streamLoadProp = new Properties(); + streamLoadProp.setProperty("column_separator", CSV_COLUMN_SEPARATOR.toString()); + streamLoadProp.setProperty("columns", stringBuilder.toString()); + this.maxRetry = 3; + this.path = path; + this.httpClient = httpClient; + } + + public Long getTxnID() { + return txnID; + } + + public void firstCommit() throws Exception { + Path pathChecked = Preconditions.checkNotNull(path, "stream load temp CSV file is empty."); + String label = dorisLabelInfo.label(); + LOGGER.info("preCommit label {}. .", label); + StreamLoadRespContent respContent = null; + try { + + InputStreamEntity entity = new InputStreamEntity(new FileInputStream(pathChecked.toFile())); + StreamLoadHttpPutBuilder builder = StreamLoadHttpPutBuilder.builder(); + builder.setUrl(loadUrlStr) + .baseAuth(user, passwd) + .addCommonHeader() + .enable2PC(enable2PC) + .setLabel(label) + .setEntity(entity) + .addProperties(streamLoadProp); + HttpPut build = builder.build(); + respContent = handlePreCommitResponse(httpClient.execute(build)); + Preconditions.checkState("true".equals(respContent.getTwoPhaseCommit())); + if (!DORIS_SUCCESS_STATUS.contains(respContent.getStatus())) { + String errMsg = String.format("stream load error: %s, see more in %s", respContent.getMessage(), respContent.getErrorURL()); + throw new DorisRuntimeException(errMsg); + } else { + String commitType = enable2PC ? "preCommit" : "commit"; + LOGGER.info("{} for label {} finished: {}", commitType, label, respContent.toString()); + } + } catch (Exception e) { + LOGGER.warn("failed to stream load data", e); + throw e; + } + this.txnID = respContent.getTxnId(); + } + + // commit + public void commitTransaction() throws IOException { + int statusCode = -1; + String reasonPhrase = null; + int retry = 0; + CloseableHttpResponse response = null; + StreamLoadHttpPutBuilder putBuilder = StreamLoadHttpPutBuilder.builder(); + putBuilder.setUrl(secondUrlStr) + .baseAuth(user, passwd) + .addCommonHeader() + .addTxnId(txnID) + .setEmptyEntity() + .commit(); + while (retry++ < maxRetry) { + + try { + response = httpClient.execute(putBuilder.build()); + } catch (IOException e) { + LOGGER.warn("try commit failed with {} times", retry + 1); + continue; + } + statusCode = response.getStatusLine().getStatusCode(); + reasonPhrase = response.getStatusLine().getReasonPhrase(); + if (statusCode != 200) { + LOGGER.warn("commit transaction failed with {}, reason {}", hostPort, reasonPhrase); + } else { + LOGGER.info("commit transaction successes , response: {}", response.getStatusLine().toString()); + break; + } + } + + if (statusCode != 200) { + throw new DorisRuntimeException("stream load error: " + reasonPhrase); + } + + ObjectMapper mapper = new ObjectMapper(); + if (response.getEntity() != null) { + String loadResult = EntityUtils.toString(response.getEntity()); + Map res = mapper.readValue(loadResult, new TypeReference>() {}); + Matcher matcher = COMMITTED_PATTERN.matcher(res.get("msg")); + if (res.get("status").equals(FAIL) && !matcher.matches()) { + throw new DorisRuntimeException("Commit failed " + loadResult); + } else { + LOGGER.info("load result {}", loadResult); + } + } + } + + // abort + public void abortTransaction() throws Exception { + StreamLoadHttpPutBuilder builder = StreamLoadHttpPutBuilder.builder(); + builder.setUrl(secondUrlStr) + .baseAuth(user, passwd) + .addCommonHeader() + .addTxnId(txnID) + .setEmptyEntity() + .abort(); + CloseableHttpResponse response = httpClient.execute(builder.build()); + + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode != 200 || response.getEntity() == null) { + LOGGER.warn("abort transaction response: " + response.getStatusLine().toString()); + throw new DorisRuntimeException("Failed abort transaction:" + txnID + ", with url " + secondUrlStr); + } else { + LOGGER.info("abort transaction response: " + response.getStatusLine().toString()); + } + + ObjectMapper mapper = new ObjectMapper(); + String loadResult = EntityUtils.toString(response.getEntity()); + Map res = mapper.readValue(loadResult, new TypeReference>() {}); + if (FAIL.equals(res.get("status"))) { + LOGGER.warn("Fail to abort transaction. error: {}", res.get("msg")); + } + } + + private StreamLoadRespContent stopLoad() throws IOException { + LOGGER.info("stream load stopped."); + Preconditions.checkState(pendingLoadFuture != null); + try { + return handlePreCommitResponse(pendingLoadFuture.get()); + } catch (Exception e) { + throw new DorisRuntimeException(e); + } + } + + public StreamLoadRespContent handlePreCommitResponse(CloseableHttpResponse response) throws Exception { + final int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200 && response.getEntity() != null) { + String loadResult = EntityUtils.toString(response.getEntity()); + LOGGER.info("load Result {}", loadResult); + return OBJECT_MAPPER.readValue(loadResult, StreamLoadRespContent.class); + } + throw new StreamLoadException("stream load response error: " + response.getStatusLine().toString()); + } + + public Path getPath() { + return path; + } + + public void close() throws IOException { + if (null != httpClient) { + try { + httpClient.close(); + } catch (IOException e) { + throw new IOException("Closing httpClient failed.", e); + } + } + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java new file mode 100644 index 0000000..2d0afa1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; + +public class DorisWriteConfig { + + private final DorisStreamLoad dorisStreamLoad; + private final CSVPrinter writer; + private final CSVFormat format; + + public DorisWriteConfig(DorisStreamLoad dorisStreamLoad, CSVPrinter writer, CSVFormat format) { + this.dorisStreamLoad = dorisStreamLoad; + this.writer = writer; + this.format = format; + } + + public DorisStreamLoad getDorisStreamLoad() { + return dorisStreamLoad; + } + + public CSVFormat getFormat() { + return format; + } + + public CSVPrinter getWriter() { + return writer; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java new file mode 100644 index 0000000..2bf0b61 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; + +public class HttpUtil { + + private final HttpClientBuilder httpClientBuilder = + HttpClients + .custom() + .setRedirectStrategy(new DefaultRedirectStrategy() { + + @Override + protected boolean isRedirectable(String method) { + return true; + } + + }); + + public CloseableHttpClient getClient() { + return httpClientBuilder.build(); + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java new file mode 100644 index 0000000..103924e --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.google.common.base.Preconditions; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.StringEntity; + +public class StreamLoadHttpPutBuilder { + + String url; + + Map prop; + + HttpEntity httpEntity; + + private StreamLoadHttpPutBuilder() { + this.prop = new HashMap<>(); + } + + public static StreamLoadHttpPutBuilder builder() { + return new StreamLoadHttpPutBuilder(); + } + + public StreamLoadHttpPutBuilder setUrl(String url) { + this.url = url; + return this; + } + + // 用户最好设置Expect Header字段内容100-continue,这样可以在某些出错场景下避免不必要的数据传输 + public StreamLoadHttpPutBuilder addCommonHeader() { + prop.put(HttpHeaders.EXPECT, "100-continue"); + return this; + } + + public StreamLoadHttpPutBuilder enable2PC(Boolean bool) { + prop.put("two_phase_commit", bool.toString()); + return this; + } + + public StreamLoadHttpPutBuilder baseAuth(String user, String password) { + byte[] encoded = Base64.encodeBase64(user.concat(":").concat(password).getBytes(StandardCharsets.UTF_8)); + prop.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded, StandardCharsets.UTF_8)); + return this; + } + + public StreamLoadHttpPutBuilder addTxnId(long txnID) { + prop.put("txn_id", String.valueOf(txnID)); + return this; + } + + public StreamLoadHttpPutBuilder commit() { + prop.put("txn_operation", "commit"); + return this; + } + + public StreamLoadHttpPutBuilder abort() { + prop.put("txn_operation", "abort"); + return this; + } + + public StreamLoadHttpPutBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public StreamLoadHttpPutBuilder setEmptyEntity() { + try { + this.httpEntity = new StringEntity(""); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + return this; + } + + public StreamLoadHttpPutBuilder addProperties(Properties properties) { + properties.forEach((key, value) -> prop.put(String.valueOf(key), String.valueOf(value))); + return this; + } + + public StreamLoadHttpPutBuilder setLabel(String label) { + prop.put("label", label); + return this; + } + + public HttpPut build() { + Preconditions.checkNotNull(url); + Preconditions.checkNotNull(httpEntity); + HttpPut put = new HttpPut(url); + prop.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java new file mode 100644 index 0000000..16eaed4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class StreamLoadRespContent { + + @JsonProperty(value = "TxnId") + private long TxnId; + + @JsonProperty(value = "Label") + private String Label; + + @JsonProperty(value = "Status") + private String Status; + + @JsonProperty(value = "TwoPhaseCommit") + private String TwoPhaseCommit; + + @JsonProperty(value = "ExistingJobStatus") + private String ExistingJobStatus; + + @JsonProperty(value = "Message") + private String Message; + + @JsonProperty(value = "NumberTotalRows") + private long NumberTotalRows; + + @JsonProperty(value = "NumberLoadedRows") + private long NumberLoadedRows; + + @JsonProperty(value = "NumberFilteredRows") + private int NumberFilteredRows; + + @JsonProperty(value = "NumberUnselectedRows") + private int NumberUnselectedRows; + + @JsonProperty(value = "LoadBytes") + private long LoadBytes; + + @JsonProperty(value = "LoadTimeMs") + private int LoadTimeMs; + + @JsonProperty(value = "BeginTxnTimeMs") + private int BeginTxnTimeMs; + + @JsonProperty(value = "StreamLoadPutTimeMs") + private int StreamLoadPutTimeMs; + + @JsonProperty(value = "ReadDataTimeMs") + private int ReadDataTimeMs; + + @JsonProperty(value = "WriteDataTimeMs") + private int WriteDataTimeMs; + + @JsonProperty(value = "CommitAndPublishTimeMs") + private int CommitAndPublishTimeMs; + + @JsonProperty(value = "ErrorURL") + private String ErrorURL; + + public long getTxnId() { + return TxnId; + } + + public String getStatus() { + return Status; + } + + public String getTwoPhaseCommit() { + return TwoPhaseCommit; + } + + public String getMessage() { + return Message; + } + + public String getExistingJobStatus() { + return ExistingJobStatus; + } + + @Override + public String toString() { + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return ""; + } + } + + public String getErrorURL() { + return ErrorURL; + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java new file mode 100644 index 0000000..c416bd6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris.exception; + +public class DorisException extends Exception { + + public DorisException() { + super(); + } + + public DorisException(String message) { + super(message); + } + + public DorisException(String message, Throwable cause) { + super(message, cause); + } + + public DorisException(Throwable cause) { + super(cause); + } + + protected DorisException(String message, + Throwable cause, + boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java new file mode 100644 index 0000000..b749607 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris.exception; + +/** + * Doris runtime exception. + */ +public class DorisRuntimeException extends RuntimeException { + + public DorisRuntimeException() { + super(); + } + + public DorisRuntimeException(String message) { + super(message); + } + + public DorisRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public DorisRuntimeException(Throwable cause) { + super(cause); + } + + protected DorisRuntimeException(String message, + Throwable cause, + boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java new file mode 100644 index 0000000..bc0995d --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris.exception; + +public class IllegalArgumentException extends DorisException { + + public IllegalArgumentException(String msg, Throwable cause) { + super(msg, cause); + } + + public IllegalArgumentException(String arg, String value) { + super("argument '" + arg + "' is illegal, value is '" + value + "'."); + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java new file mode 100644 index 0000000..50d012f --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris.exception; + +public class StreamLoadException extends Exception { + + public StreamLoadException() { + super(); + } + + public StreamLoadException(String message) { + super(message); + } + + public StreamLoadException(String message, Throwable cause) { + super(message, cause); + } + + public StreamLoadException(Throwable cause) { + super(cause); + } + + protected StreamLoadException(String message, + Throwable cause, + boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json new file mode 100644 index 0000000..42cddd0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json @@ -0,0 +1,60 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/doris", + "supportsIncremental": false, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append", "overwrite"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Doris Destination Spec", + "type": "object", + "required": ["host", "httpport", "queryport", "username", "database"], + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database", + "type": "string", + "order": 0 + }, + "httpport": { + "title": "HttpPort", + "description": "Http Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8030, + "examples": ["8030"], + "order": 1 + }, + "queryport": { + "title": "QueryPort", + "description": "Query(SQL) Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9030, + "examples": ["9030"], + "order": 2 + }, + "database": { + "title": "DataBase Name", + "description": "Name of the database.", + "type": "string", + "order": 3 + }, + "username": { + "title": "UserName", + "description": "Username to use to access the database.", + "type": "string", + "order": 4 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java new file mode 100644 index 0000000..b2e8ddd --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import org.apache.commons.lang3.StringEscapeUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DorisDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(DorisDestinationAcceptanceTest.class); + + private JsonNode configJson; + + private static final Path RELATIVE_PATH = Path.of("integration_test/test"); + + private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; + private static final String DB_URL_PATTERN = "jdbc:mysql://%s:%d?rewriteBatchedStatements=true&useSSL=true&useUnicode=true&characterEncoding=utf8"; + private static final int PORT = 8211; + private static Connection conn = null; + + private static final StandardNameTransformer namingResolver = new StandardNameTransformer(); + + @Override + protected String getImageName() { + return "airbyte/destination-doris:dev"; + } + + @BeforeAll + public static void getConnect() { + final JsonNode config = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json"))); + final String dbUrl = String.format(DB_URL_PATTERN, config.get("host").asText(), PORT); + try { + Class.forName(JDBC_DRIVER); + conn = + DriverManager.getConnection(dbUrl, config.get("username").asText(), config.get("password") == null ? "" : config.get("password").asText()); + } catch (final Exception e) { + e.printStackTrace(); + } + + } + + @AfterAll + public static void closeConnect() throws SQLException { + if (conn != null) { + conn.close(); + } + } + + @Override + protected JsonNode getConfig() { + // TODO: Generate the configuration JSON file to be used for running the destination during the test + // configJson can either be static and read from secrets/config.json directly + // or created in the setup method + configJson = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json"))); + return configJson; + } + + @Override + protected JsonNode getFailCheckConfig() { + // TODO return an invalid config which, when used to run the connector's check connection operation, + // should result in a failed connection check + return null; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws IOException, SQLException { + // TODO Implement this method to retrieve records which written to the destination by the connector. + // Records returned from this method will be compared against records provided to the connector + // to verify they were written correctly + + final String tableName = namingResolver.getIdentifier(streamName); + + final String query = String.format( + "SELECT * FROM %s.%s ORDER BY %s ASC;", configJson.get("database").asText(), tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + final PreparedStatement stmt = conn.prepareStatement(query); + final ResultSet resultSet = stmt.executeQuery(); + + final List res = new ArrayList<>(); + while (resultSet.next()) { + final String sss = resultSet.getString(JavaBaseConstants.COLUMN_NAME_DATA); + res.add(Jsons.deserialize(StringEscapeUtils.unescapeJava(sss))); + } + stmt.close(); + return res; + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + // TODO Implement this method to run any setup actions needed before every test case + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + // TODO Implement this method to run any cleanup actions needed after every test case + } + + public void testLineBreakCharacters() { + // overrides test with a no-op until we handle full UTF-8 in the destination + } + + public void testSecondSync() throws Exception { + // PubSub cannot overwrite messages, its always append only + } + +} diff --git a/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java b/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java new file mode 100644 index 0000000..d98a37b --- /dev/null +++ b/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.doris; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Instant; +import java.util.Collections; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class DorisDestinationTest { + + private static final Instant NOW = Instant.now(); + private static final Path TEST_ROOT = Path.of("/tmp/airbyte_tests"); + private static final String USERS_STREAM_NAME = "users"; + private static final String TASKS_STREAM_NAME = "tasks"; + private static final String USERS_FILE = new StandardNameTransformer().getRawTableName(USERS_STREAM_NAME) + ".csv"; + private static final String TASKS_FILE = new StandardNameTransformer().getRawTableName(TASKS_STREAM_NAME) + ".csv";; + private static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "john").put("id", "10").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "susan").put("id", "30").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "game").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "code").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build()))); + + private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList( + CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, null, Field.of("name", JsonSchemaType.STRING), + Field.of("id", JsonSchemaType.STRING)), + CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, null, Field.of("goal", JsonSchemaType.STRING)))); + + private Path destinationPath; + private JsonNode config; + + @BeforeEach + void setup() throws IOException { + destinationPath = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "test"); + config = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json"))); + } + + private DorisDestination getDestination() { + final DorisDestination result = spy(DorisDestination.class); + doReturn(destinationPath).when(result).getTempPathDir(any()); + return result; + } + + @Test + void testSpec() throws Exception { + final ConnectorSpecification actual = getDestination().spec(); + final String resourceString = MoreResources.readResource("spec.json"); + final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class); + + assertEquals(expected, actual); + } + + @Test + void testCheckSuccess() { + final AirbyteConnectionStatus actual = getDestination().check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + assertEquals(expected, actual); + } + + @Test + void testCheckFailure() throws IOException { + final Path looksLikeADirectoryButIsAFile = destinationPath.resolve("file"); + FileUtils.touch(looksLikeADirectoryButIsAFile.toFile()); + final DorisDestination destination = spy(DorisDestination.class); + doReturn(looksLikeADirectoryButIsAFile).when(destination).getTempPathDir(any()); + // final JsonNode config = + // Jsons.jsonNode(ImmutableMap.of(DorisDestination.DESTINATION_TEMP_PATH_FIELD, + // looksLikeADirectoryButIsAFile.toString())); + final AirbyteConnectionStatus actual = destination.check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED); + + // the message includes the random file path, so just verify it exists and then remove it when we do + // rest of the comparison. + assertNotNull(actual.getMessage()); + actual.setMessage(null); + assertEquals(expected, actual); + } + + @Test + void testCheckInvalidDestinationFolder() { + // final Path relativePath = Path.of("../tmp/conf.d/"); + // final JsonNode config = + // Jsons.jsonNode(ImmutableMap.of(DorisDestination.DESTINATION_TEMP_PATH_FIELD, + // relativePath.toString())); + final AirbyteConnectionStatus actual = new DorisDestination().check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED); + // the message includes the random file path, so just verify it exists and then remove it when we do + // rest of the comparison. + assertNotNull(actual.getMessage()); + actual.setMessage(null); + assertEquals(expected, actual); + } + + @Test + void testWriteSuccess() throws Exception { + DorisDestination destination = getDestination(); + destination.check(config); + final AirbyteMessageConsumer consumer = destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector); + consumer.accept(MESSAGE_USERS1); + consumer.accept(MESSAGE_TASKS1); + consumer.accept(MESSAGE_USERS2); + consumer.accept(MESSAGE_TASKS2); + consumer.accept(MESSAGE_STATE); + consumer.close(); + + } + + @SuppressWarnings("ResultOfMethodCallIgnored") + @Test + void testWriteFailure() throws Exception { + // hack to force an exception to be thrown from within the consumer. + final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1); + doThrow(new RuntimeException()).when(spiedMessage).getRecord(); + DorisDestination destination = getDestination(); + destination.check(config); + final AirbyteMessageConsumer consumer = spy(destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector)); + + assertThrows(RuntimeException.class, () -> consumer.accept(spiedMessage)); + consumer.accept(MESSAGE_USERS2); + assertThrows(IOException.class, consumer::close); + + // verify tmp files are cleaned up and no files are output at all + final Set actualFilenames = Files.list(destinationPath).map(Path::getFileName).map(Path::toString).collect(Collectors.toSet()); + assertEquals(Collections.emptySet(), actualFilenames); + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/README.md b/airbyte-integrations/connectors/destination-exasol/README.md new file mode 100644 index 0000000..8651db3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/README.md @@ -0,0 +1,71 @@ +# Destination Exasol + +This is the repository for the Exasol destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/exasol). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-exasol:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-exasol:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-exasol:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-exasol:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-exasol:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-exasol:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-exasol:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +The connector uses `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/exasol`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-exasol:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-exasol:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-exasol test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/exasol.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-exasol/bootstrap.md b/airbyte-integrations/connectors/destination-exasol/bootstrap.md new file mode 100644 index 0000000..f3342f5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/bootstrap.md @@ -0,0 +1,19 @@ +# Exasol + +## Overview + +Exasol is the in-memory database built for analytics. + +## Endpoints + +The destination-exasol connector uses the official [Exasol JDBC driver](https://docs.exasol.com/db/latest/connect_exasol/drivers/jdbc.htm). + + ## Quick Notes + +- TLS connections are used by default. If the Exasol database uses a self-signed certificate, specify the certificate fingerprint. + +## Reference + +- [Exasol homepage](https://www.exasol.com/) +- [Exasol documentation](https://docs.exasol.com/db/latest/home.htm) +- [Exasol JDBC driver documentation](https://docs.exasol.com/db/latest/connect_exasol/drivers/jdbc.htm) diff --git a/airbyte-integrations/connectors/destination-exasol/build.gradle b/airbyte-integrations/connectors/destination-exasol/build.gradle new file mode 100644 index 0000000..3380731 --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/build.gradle @@ -0,0 +1,37 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.exasol.ExasolDestination' +} + +dependencies { + + implementation 'com.exasol:exasol-jdbc:7.1.17' + + testImplementation 'org.hamcrest:hamcrest-all:1.3' + + // Explicitly upgrade testcontainers to avoid java.lang.NoSuchMethodError: + // 'org.testcontainers.containers.GenericContainer com.exasol.containers.ExasolContainer.withCopyToContainer(org.testcontainers.images.builder.Transferable, java.lang.String)' + testImplementation 'org.testcontainers:testcontainers:1.17.6' + + integrationTestJavaImplementation 'com.exasol:exasol-testcontainers:6.5.0' + integrationTestJavaImplementation 'org.testcontainers:testcontainers:1.17.6' +} diff --git a/airbyte-integrations/connectors/destination-exasol/metadata.yaml b/airbyte-integrations/connectors/destination-exasol/metadata.yaml new file mode 100644 index 0000000..90b0a6b --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/metadata.yaml @@ -0,0 +1,23 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: bb6071d9-6f34-4766-bec2-d1d4ed81a653 + dockerImageTag: 0.1.1 + dockerRepository: airbyte/destination-exasol + githubIssueLabel: destination-exasol + license: MIT + name: Exasol + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/exasol + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java new file mode 100644 index 0000000..8145c85 --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.commons.json.Jsons; +import java.util.HashMap; +import java.util.Map; + +public class ExasolDestination extends AbstractJdbcDestination implements Destination { + + public static final String DRIVER_CLASS = DatabaseDriver.EXASOL.getDriverClassName(); + + public ExasolDestination() { + super(DRIVER_CLASS, new ExasolSQLNameTransformer(), new ExasolSqlOperations()); + } + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new ExasolDestination()).run(args); + } + + @Override + public JsonNode toJdbcConfig(final JsonNode config) { + final String jdbcUrl = String.format(DatabaseDriver.EXASOL.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), config.get(JdbcUtils.PORT_KEY).asInt()); + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, config.get(JdbcUtils.USERNAME_KEY).asText()) + .put(JdbcUtils.JDBC_URL_KEY, jdbcUrl) + .put("schema", config.get(JdbcUtils.SCHEMA_KEY).asText()); + + if (config.has(JdbcUtils.PASSWORD_KEY)) { + configBuilder.put(JdbcUtils.PASSWORD_KEY, config.get(JdbcUtils.PASSWORD_KEY).asText()); + } + + if (config.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) { + configBuilder.put(JdbcUtils.JDBC_URL_PARAMS_KEY, config.get(JdbcUtils.JDBC_URL_PARAMS_KEY).asText()); + } + + return Jsons.jsonNode(configBuilder.build()); + } + + @Override + protected Map getDefaultConnectionProperties(final JsonNode config) { + Map properties = new HashMap<>(); + properties.put("autocommit", "0"); + if (config.has("certificateFingerprint")) { + properties.put("fingerprint", config.get("certificateFingerprint").asText()); + } + return properties; + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformer.java b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformer.java new file mode 100644 index 0000000..8fd3caf --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformer.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.text.Names; + +public class ExasolSQLNameTransformer extends StandardNameTransformer { + + @Override + public String applyDefaultCase(final String input) { + return input.toUpperCase(); + } + + @Override + public String getRawTableName(final String streamName) { + // Exasol identifiers starting with _ must be quoted + return Names.doubleQuote(super.getRawTableName(streamName)); + } + + @Override + public String getTmpTableName(final String streamName) { + // Exasol identifiers starting with _ must be quoted + return Names.doubleQuote(super.getTmpTableName(streamName)); + } + + @Override + public String convertStreamName(final String input) { + // Sometimes the stream name is already quoted, so remove quotes before converting. + // Exasol identifiers starting with _ must be quoted. + return Names.doubleQuote(super.convertStreamName(unquote(input))); + } + + private static String unquote(final String input) { + String result = input; + if (result.startsWith("\"")) { + result = result.substring(1); + } + if (result.endsWith("\"")) { + result = result.substring(0, result.length() - 1); + } + return result; + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperations.java b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperations.java new file mode 100644 index 0000000..e0353bd --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperations.java @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +public class ExasolSqlOperations extends JdbcSqlOperations { + + public static final String COLUMN_NAME_AB_ID = + "\"" + JavaBaseConstants.COLUMN_NAME_AB_ID.toUpperCase() + "\""; + public static final String COLUMN_NAME_DATA = + "\"" + JavaBaseConstants.COLUMN_NAME_DATA.toUpperCase() + "\""; + public static final String COLUMN_NAME_EMITTED_AT = + "\"" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT.toUpperCase() + "\""; + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + String query = String.format(""" + CREATE TABLE IF NOT EXISTS %s.%s ( + %s VARCHAR(64), + %s VARCHAR(2000000), + %s TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(%s) + )""", + schemaName, tableName, + ExasolSqlOperations.COLUMN_NAME_AB_ID, + ExasolSqlOperations.COLUMN_NAME_DATA, + ExasolSqlOperations.COLUMN_NAME_EMITTED_AT, + ExasolSqlOperations.COLUMN_NAME_AB_ID); + LOGGER.info("Create table query: {}", query); + return query; + } + + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + database.executeWithinTransaction(queries); + } + + @Override + protected void insertRecordsInternal(JdbcDatabase database, List records, String schemaName, String tableName) + throws Exception { + if (records.isEmpty()) { + return; + } + Path tmpFile = createBatchFile(tableName, records); + try { + String importStatement = String.format(""" + IMPORT INTO %s.%s + FROM LOCAL CSV FILE '%s' + ROW SEPARATOR = 'CRLF' + COLUMN SEPARATOR = ','""", schemaName, tableName, tmpFile.toAbsolutePath()); + LOGGER.info("IMPORT statement: {}", importStatement); + database.execute(connection -> connection.createStatement().execute(importStatement)); + } finally { + Files.delete(tmpFile); + } + } + + private Path createBatchFile(String tableName, List records) throws Exception { + Path tmpFile = Files.createTempFile(tableName + "-", ".tmp"); + writeBatchToFile(tmpFile.toFile(), records); + return tmpFile; + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-exasol/src/main/resources/spec.json new file mode 100644 index 0000000..865270c --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/main/resources/spec.json @@ -0,0 +1,64 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/exasol", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Exasol Destination Spec", + "type": "object", + "required": ["host", "port", "username", "schema"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 8563, + "examples": ["8563"], + "order": 1 + }, + "certificateFingerprint": { + "title": "Certificate Fingerprint", + "description": "Fingerprint of the Exasol server's TLS certificate", + "type": "string", + "examples": ["ABC123..."], + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "schema": { + "title": "Schema Name", + "description": "Schema Name", + "type": "string", + "order": 5 + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol ';'. (example: key1=value1;key2=value2;key3=value3).", + "title": "JDBC URL Params", + "type": "string", + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolDestinationAcceptanceTest.java new file mode 100644 index 0000000..8fd01ec --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolDestinationAcceptanceTest.java @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import com.exasol.containers.ExasolContainer; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.JdbcDestinationAcceptanceTest; +import io.airbyte.commons.json.Jsons; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.jooq.DSLContext; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ExasolDestinationAcceptanceTest extends JdbcDestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(ExasolDestinationAcceptanceTest.class); + + private static final ExasolContainer> EXASOL = new ExasolContainer<>() + .withReuse(true); + + private final NamingConventionTransformer namingResolver = new ExasolSQLNameTransformer(); + private static JsonNode config; + + @BeforeAll + static void startExasolContainer() { + EXASOL.start(); + config = createExasolConfig(EXASOL); + } + + private static JsonNode createExasolConfig(final ExasolContainer> exasol) { + return Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, exasol.getHost()) + .put(JdbcUtils.PORT_KEY, exasol.getFirstMappedDatabasePort()) + .put("certificateFingerprint", exasol.getTlsCertificateFingerprint().orElseThrow()) + .put(JdbcUtils.USERNAME_KEY, exasol.getUsername()) + .put(JdbcUtils.PASSWORD_KEY, exasol.getPassword()) + .put(JdbcUtils.SCHEMA_KEY, "TEST") + .build()); + } + + @AfterAll + static void stopExasolContainer() { + EXASOL.stop(); + } + + @Override + protected String getImageName() { + return "airbyte/destination-exasol:dev"; + } + + @Override + protected JsonNode getConfig() { + return Jsons.clone(config); + } + + @Override + protected JsonNode getFailCheckConfig() { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put(JdbcUtils.PASSWORD_KEY, "wrong password"); + return clone; + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws SQLException { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), "\"" + namespace + "\"") + .stream() + .map(r -> r.get(JavaBaseConstants.COLUMN_NAME_DATA.toUpperCase())) + .map(node -> Jsons.deserialize(node.asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + final String query = String.format("SELECT * FROM %s.%s ORDER BY %s ASC", schemaName, tableName, ExasolSqlOperations.COLUMN_NAME_EMITTED_AT); + LOGGER.info("Retrieving records using query {}", query); + try (final DSLContext dslContext = getDSLContext(config)) { + final List result = new Database(dslContext) + .query(ctx -> new ArrayList<>(ctx.fetch(query))); + return result + .stream() + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + } + + private static DSLContext getDSLContext(final JsonNode config) { + final String jdbcUrl = + String.format(DatabaseDriver.EXASOL.getUrlFormatString(), config.get(JdbcUtils.HOST_KEY).asText(), config.get(JdbcUtils.PORT_KEY).asInt()); + final Map jdbcConnectionProperties = Map.of("fingerprint", config.get("certificateFingerprint").asText()); + return DSLContextFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + config.get(JdbcUtils.PASSWORD_KEY).asText(), + DatabaseDriver.EXASOL.getDriverClassName(), + jdbcUrl, + null, + jdbcConnectionProperties); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + // Nothing to do + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + EXASOL.purgeDatabase(); + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperationsAcceptanceTest.java b/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperationsAcceptanceTest.java new file mode 100644 index 0000000..dd32fea --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/test-integration/java/io/airbyte/integrations/destination/exasol/ExasolSqlOperationsAcceptanceTest.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.exasol.containers.ExasolContainer; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLSyntaxErrorException; +import java.util.Arrays; +import javax.sql.DataSource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ExasolSqlOperationsAcceptanceTest { + + private static final ExasolContainer> EXASOL = new ExasolContainer<>() + .withReuse(true); + private ExasolSqlOperations operations; + + @BeforeAll + static void startExasolContainer() { + EXASOL.start(); + } + + @AfterAll + static void stopExasolContainer() { + EXASOL.stop(); + } + + @BeforeEach + void setup() { + this.operations = new ExasolSqlOperations(); + EXASOL.purgeDatabase(); + } + + @Test + void executeTransactionEmptyList() { + assertDoesNotThrow(() -> executeTransaction()); + } + + @Test + void executeTransactionSingleStatementSuccess() throws Exception { + executeTransaction("CREATE SCHEMA TESTING_SCHEMA"); + assertSchemaExists("TESTING_SCHEMA", true); + } + + @Test + void executeTransactionTowStatementsSuccess() throws Exception { + executeTransaction("CREATE SCHEMA TESTING_SCHEMA", "CREATE TABLE TESTING_TABLE (C1 VARCHAR(5))"); + assertSchemaExists("TESTING_SCHEMA", true); + assertTableExists("TESTING_SCHEMA", "TESTING_TABLE"); + } + + @Test + void executeTransactionTwoStatementsFailure() throws Exception { + assertThrows(SQLSyntaxErrorException.class, () -> executeTransaction("CREATE SCHEMA TESTING_SCHEMA", "INVALID STATEMENT")); + assertSchemaExists("TESTING_SCHEMA", false); + } + + private static void assertSchemaExists(String schemaName, boolean exists) throws SQLException { + try (ResultSet rs = EXASOL.createConnection().getMetaData().getSchemas(null, schemaName)) { + assertThat("Schema exists", rs.next(), equalTo(exists)); + } + } + + private static void assertTableExists(String schemaName, String tableName) throws SQLException { + try (ResultSet rs = EXASOL.createConnection().getMetaData().getTables(null, schemaName, tableName, null)) { + assertThat("Table exists", rs.next(), equalTo(true)); + } + } + + private void executeTransaction(String... statements) throws Exception { + this.operations.executeTransaction(createDatabase(), Arrays.asList(statements)); + } + + private JdbcDatabase createDatabase() { + DataSource dataSource = DataSourceFactory.create(EXASOL.getUsername(), EXASOL.getPassword(), ExasolDestination.DRIVER_CLASS, EXASOL.getJdbcUrl()); + return new DefaultJdbcDatabase(dataSource); + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolDestinationTest.java b/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolDestinationTest.java new file mode 100644 index 0000000..79789c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolDestinationTest.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; +import static org.junit.jupiter.api.Assertions.*; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.map.MoreMaps; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; + +class ExasolDestinationTest { + + private ExasolDestination destination; + + @BeforeEach + void setup() { + destination = new ExasolDestination(); + } + + private JsonNode createConfig() { + return createConfig(new HashMap<>()); + } + + private JsonNode createConfig(final Map additionalConfigs) { + return Jsons.jsonNode(MoreMaps.merge(baseParameters(), additionalConfigs)); + } + + private Map baseParameters() { + return ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, "localhost") + .put(JdbcUtils.PORT_KEY, "8563") + .put(JdbcUtils.USERNAME_KEY, "sys") + .put(JdbcUtils.SCHEMA_KEY, "mySchema") + .build(); + } + + @Test + void toJdbcConfigDefault() { + var result = destination.toJdbcConfig(createConfig()); + assertAll( + () -> assertThat(result.size(), equalTo(3)), + () -> assertThat(result.get(JdbcUtils.USERNAME_KEY).asText(), equalTo("sys")), + () -> assertThat(result.get(JdbcUtils.JDBC_URL_KEY).asText(), equalTo("jdbc:exa:localhost:8563")), + () -> assertThat(result.get(JdbcUtils.SCHEMA_KEY).asText(), equalTo("mySchema"))); + } + + @Test + void toJdbcConfigWithPassword() { + var result = destination.toJdbcConfig(createConfig(Map.of(JdbcUtils.PASSWORD_KEY, "exasol"))); + assertAll( + () -> assertThat(result.size(), equalTo(4)), + () -> assertThat(result.get(JdbcUtils.PASSWORD_KEY).asText(), equalTo("exasol"))); + } + + @Test + void toJdbcConfigWithJdbcUrlParameters() { + var result = destination.toJdbcConfig(createConfig(Map.of(JdbcUtils.JDBC_URL_PARAMS_KEY, "param=value"))); + assertAll( + () -> assertThat(result.size(), equalTo(4)), + () -> assertThat(result.get(JdbcUtils.JDBC_URL_PARAMS_KEY).asText(), equalTo("param=value"))); + } + + @Test + void getDefaultConnectionProperties() { + var result = destination.getDefaultConnectionProperties(createConfig()); + assertThat(result, equalTo(Map.of("autocommit", "0"))); + } + + @Test + void getDefaultConnectionPropertiesWithFingerprint() { + var result = destination.getDefaultConnectionProperties(createConfig(Map.of("certificateFingerprint", "ABC"))); + assertThat(result, equalTo(Map.of("fingerprint", "ABC", "autocommit", "0"))); + } + +} diff --git a/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformerTest.java b/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformerTest.java new file mode 100644 index 0000000..e5dd08d --- /dev/null +++ b/airbyte-integrations/connectors/destination-exasol/src/test/java/io/airbyte/integrations/destination/exasol/ExasolSQLNameTransformerTest.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.exasol; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +class ExasolSQLNameTransformerTest { + + private ExasolSQLNameTransformer transformer; + + @BeforeEach + void setUp() { + transformer = new ExasolSQLNameTransformer(); + } + + @ParameterizedTest + @CsvSource({"text, TEXT", "Text, TEXT", "TEXT, TEXT", "_äöüß, _ÄÖÜSS"}) + void applyDefaultCase(String input, String expectedOutput) { + assertEquals(expectedOutput, transformer.applyDefaultCase(input)); + } + + @ParameterizedTest + @CsvSource({"stream, \"_airbyte_raw_stream\"", + "Stream, \"_airbyte_raw_Stream\"", + "stream*, \"_airbyte_raw_stream_\"", + "äöü, \"_airbyte_raw_aou\""}) + void getRawTableName(String streamName, String expectedTableName) { + assertEquals(expectedTableName, transformer.getRawTableName(streamName)); + } + + @Test + void getTmpTableNamePrefixSuffix() { + String tmpTableName = transformer.getTmpTableName("stream"); + assertThat(tmpTableName, allOf( + startsWith("\"_airbyte_tmp_"), + endsWith("_stream\""))); + } + + @Test + void getTmpTableNameDifferentForEachCall() { + String name1 = transformer.getTmpTableName("stream"); + String name2 = transformer.getTmpTableName("stream"); + assertThat(name1, not(equalTo(name2))); + } + + @ParameterizedTest + @CsvSource({"stream, stream", + "Stream, Stream", + "STREAM, STREAM", + "stream*, stream_", + "_stream_, _stream_", + "äöü, aou", + "\"stream, stream", + "stream\", stream", + "\"stream\", stream",}) + void convertStreamName(String streamName, String expectedTableName) { + assertThat(transformer.convertStreamName(streamName), equalTo("\"" + expectedTableName + "\"")); + } + +} diff --git a/airbyte-integrations/connectors/destination-firebolt/Dockerfile b/airbyte-integrations/connectors/destination-firebolt/Dockerfile new file mode 100644 index 0000000..01a8aed --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/Dockerfile @@ -0,0 +1,29 @@ +FROM python:3.9-slim as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip3 install --prefix=/install --no-cache-dir . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# copy payload code only +COPY main.py ./ +COPY destination_firebolt ./destination_firebolt + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python3", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-firebolt diff --git a/airbyte-integrations/connectors/destination-firebolt/README.md b/airbyte-integrations/connectors/destination-firebolt/README.md new file mode 100644 index 0000000..d19fb11 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/README.md @@ -0,0 +1,99 @@ +# Firebolt Destination + +This is the repository for the Firebolt destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/firebolt). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/firebolt) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_firebolt/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination firebolt test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat integration_tests/messages.jsonl | python main.py write --config secrets/config_sql.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-firebolt build +``` + +An image will be built with the tag `airbyte/destination-firebolt:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-firebolt:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-firebolt:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-firebolt:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat integration_tests/messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-firebolt:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-firebolt test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-firebolt test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/firebolt.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-firebolt/bootstrap.md b/airbyte-integrations/connectors/destination-firebolt/bootstrap.md new file mode 100644 index 0000000..dade520 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/bootstrap.md @@ -0,0 +1,22 @@ +# Firebolt Source + +## Overview + +Firebolt is a cloud data warehouse purpose-built to provide sub-second analytics performance on massive, terabyte-scale data sets. + +Firebolt has two main concepts: Databases, which denote the storage of data and Engines, which describe the compute layer on top of a Database. + +Firebolt has three types of tables: External, Fact and Dimension. External tables, which represent a raw file structure in storage. Dimension tables, which are optimised for fetching and store data on each node in an Engine. Fact tables are similar to Dimension, but they shard the data across the nodes. The usual workload is to write source data into a set of files on S3, wrap them with an External table and write this data to a fetch-optimised Fact or Dimension table. + +## Connector + +Firebolt is a data warehouse so the most efficient way to write data into it would be in bulk. Firebolt connector offers two ways of writing data: SQL and S3. SQL transfers data in small batches and is most useful for prototyping. S3 buffers data on Amazon S3 storage and persists the data to Firebolt at the end of execution. The latter is the most efficient way of loading data, but it requires AWS S3 access. + +This connector uses [firebolt-sdk](https://pypi.org/project/firebolt-sdk/), which is a [PEP-249](https://peps.python.org/pep-0249/) DB API implementation. +`Connection` object is used to connect to a specified Engine, wich runs subsequent queries against the data stored in the Database using the `Cursor` object. +[Pyarrow](https://pypi.org/project/pyarrow/) is used to efficiently store and upload data to S3. + +## Notes + +* Integration testing requires the user to have a running engine. Spinning up an engine can take a while so this ensures a faster iteration on the connector. +* S3 is generally faster writing strategy and should be preferred. \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/__init__.py b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/__init__.py new file mode 100644 index 0000000..90396b0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationFirebolt + +__all__ = ["DestinationFirebolt"] diff --git a/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/destination.py b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/destination.py new file mode 100644 index 0000000..5b169f0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/destination.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from datetime import datetime +from logging import getLogger +from typing import Any, Dict, Iterable, Mapping, Optional +from uuid import uuid4 + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type +from firebolt.client import DEFAULT_API_URL +from firebolt.client.auth import UsernamePassword +from firebolt.db import Connection, connect + +from .writer import create_firebolt_wirter + +logger = getLogger("airbyte") + + +def parse_config(config: json, logger: Optional[AirbyteLogger] = None) -> Dict[str, Any]: + """ + Convert dict of config values to firebolt.db.Connection arguments + :param config: json-compatible dict of settings + :param logger: AirbyteLogger instance to print logs. + :return: dictionary of firebolt.db.Connection-compatible kwargs + """ + connection_args = { + "database": config["database"], + "auth": UsernamePassword(config["username"], config["password"]), + "api_endpoint": config.get("host", DEFAULT_API_URL), + "account_name": config.get("account"), + } + # engine can be a name or a full URL of a cluster + engine = config.get("engine") + if engine: + if "." in engine: + connection_args["engine_url"] = engine + else: + connection_args["engine_name"] = engine + elif logger: + logger.info("Engine parameter was not provided. Connecting to the default engine.") + return connection_args + + +def establish_connection(config: json, logger: Optional[AirbyteLogger] = None) -> Connection: + """ + Creates a connection to Firebolt database using the parameters provided. + :param config: Json object containing db credentials. + :param logger: AirbyteLogger instance to print logs. + :return: PEP-249 compliant database Connection object. + """ + logger.debug("Connecting to Firebolt.") if logger else None + connection = connect(**parse_config(config, logger)) + logger.debug("Connection to Firebolt established.") if logger else None + return connection + + +class DestinationFirebolt(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + + """ + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + streams = {s.stream.name for s in configured_catalog.streams} + + with establish_connection(config) as connection: + writer = create_firebolt_wirter(connection, config, logger) + + for configured_stream in configured_catalog.streams: + if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite: + writer.delete_table(configured_stream.stream.name) + logger.info(f"Stream {configured_stream.stream.name} is wiped.") + writer.create_raw_table(configured_stream.stream.name) + + for message in input_messages: + if message.type == Type.STATE: + yield message + elif message.type == Type.RECORD: + data = message.record.data + stream = message.record.stream + # Skip unselected streams + if stream not in streams: + logger.debug(f"Stream {stream} was not present in configured streams, skipping") + continue + writer.queue_write_data(stream, str(uuid4()), datetime.now(), json.dumps(data)) + + # Flush any leftover messages + writer.flush() + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + with establish_connection(config, logger) as connection: + # We can only verify correctness of connection parameters on execution + with connection.cursor() as cursor: + cursor.execute("SELECT 1") + # Test access to the bucket, if S3 strategy is used + create_firebolt_wirter(connection, config, logger) + + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") diff --git a/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/spec.json b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/spec.json new file mode 100644 index 0000000..a026380 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/spec.json @@ -0,0 +1,109 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/firebolt", + "supported_destination_sync_modes": ["overwrite", "append"], + "supportsIncremental": true, + "supportsDBT": true, + "supportsNormalization": false, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Firebolt Spec", + "type": "object", + "required": ["username", "password", "database"], + "additionalProperties": false, + "properties": { + "username": { + "type": "string", + "title": "Username", + "description": "Firebolt email address you use to login.", + "examples": ["username@email.com"], + "order": 0 + }, + "password": { + "type": "string", + "title": "Password", + "description": "Firebolt password.", + "airbyte_secret": true, + "order": 1 + }, + "account": { + "type": "string", + "title": "Account", + "description": "Firebolt account to login." + }, + "host": { + "type": "string", + "title": "Host", + "description": "The host name of your Firebolt database.", + "examples": ["api.app.firebolt.io"] + }, + "database": { + "type": "string", + "title": "Database", + "description": "The database to connect to." + }, + "engine": { + "type": "string", + "title": "Engine", + "description": "Engine name or url to connect to." + }, + "loading_method": { + "type": "object", + "title": "Loading Method", + "description": "Loading method used to select the way data will be uploaded to Firebolt", + "oneOf": [ + { + "title": "SQL Inserts", + "additionalProperties": false, + "required": ["method"], + "properties": { + "method": { + "type": "string", + "const": "SQL" + } + } + }, + { + "title": "External Table via S3", + "additionalProperties": false, + "required": [ + "method", + "s3_bucket", + "s3_region", + "aws_key_id", + "aws_key_secret" + ], + "properties": { + "method": { + "type": "string", + "const": "S3" + }, + "s3_bucket": { + "type": "string", + "title": "S3 bucket name", + "description": "The name of the S3 bucket." + }, + "s3_region": { + "type": "string", + "title": "S3 region name", + "description": "Region name of the S3 bucket.", + "examples": ["us-east-1"] + }, + "aws_key_id": { + "type": "string", + "title": "AWS Key ID", + "airbyte_secret": true, + "description": "AWS access key granting read and write access to S3." + }, + "aws_key_secret": { + "type": "string", + "title": "AWS Key Secret", + "airbyte_secret": true, + "description": "Corresponding secret part of the AWS Key" + } + } + } + ] + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/writer.py b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/writer.py new file mode 100644 index 0000000..6935fef --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/destination_firebolt/writer.py @@ -0,0 +1,235 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from collections import defaultdict +from datetime import datetime +from time import time +from uuid import uuid4 + +import pyarrow as pa +import pyarrow.parquet as pq +from airbyte_cdk import AirbyteLogger +from firebolt.db import Connection +from pyarrow import fs + + +class FireboltWriter: + """ + Base class for shared writer logic. + """ + + flush_interval = 1000 + + def __init__(self, connection: Connection) -> None: + """ + :param connection: Firebolt SDK connection class with established connection + to the databse. + """ + self.connection = connection + self._buffer = defaultdict(list) + self._values = 0 + + def delete_table(self, name: str) -> None: + """ + Delete the resulting table. + Primarily used in Overwrite strategy to clean up previous data. + + :param name: table name to delete. + """ + cursor = self.connection.cursor() + cursor.execute(f"DROP TABLE IF EXISTS _airbyte_raw_{name}") + + def create_raw_table(self, name: str): + """ + Create the resulting _airbyte_raw table. + + :param name: table name to create. + """ + query = f""" + CREATE FACT TABLE IF NOT EXISTS _airbyte_raw_{name} ( + _airbyte_ab_id TEXT, + _airbyte_emitted_at TIMESTAMP, + _airbyte_data TEXT + ) + PRIMARY INDEX _airbyte_ab_id + """ + cursor = self.connection.cursor() + cursor.execute(query) + + def queue_write_data(self, stream_name: str, id: str, time: datetime, record: str) -> None: + """ + Queue up data in a buffer in memory before writing to the database. + When flush_interval is reached data is persisted. + + :param stream_name: name of the stream for which the data corresponds. + :param id: unique identifier of this data row. + :param time: time of writing. + :param record: string representation of the json data payload. + """ + self._buffer[stream_name].append((id, time, record)) + self._values += 1 + if self._values == self.flush_interval: + self._flush() + + def _flush(self): + """ + Stub for the intermediate data flush that's triggered during the + buffering operation. + """ + raise NotImplementedError() + + def flush(self): + """ + Stub for the data flush at the end of writing operation. + """ + raise NotImplementedError() + + +class FireboltS3Writer(FireboltWriter): + """ + Data writer using the S3 strategy. Data is buffered in memory + before being flushed to S3 in .parquet format. At the end of + the operation data is written to Firebolt databse from S3, allowing + greater ingestion speed. + """ + + flush_interval = 100000 + + def __init__(self, connection: Connection, s3_bucket: str, access_key: str, secret_key: str, s3_region: str) -> None: + """ + :param connection: Firebolt SDK connection class with established connection + to the databse. + :param s3_bucket: Intermediate bucket to store the data files before writing them to Firebolt. + Has to be created and accessible. + :param access_key: AWS Access Key ID that has read/write/delete permissions on the files in the bucket. + :param secret_key: Corresponding AWS Secret Key. + :param s3_region: S3 region. Best to keep this the same as Firebolt database region. Default us-east-1. + """ + super().__init__(connection) + self.key_id = access_key + self.secret_key = secret_key + self.s3_bucket = s3_bucket + self._updated_tables = set() + self.unique_dir = f"{int(time())}_{uuid4()}" + self.fs = fs.S3FileSystem(access_key=access_key, secret_key=secret_key, region=s3_region) + + def _flush(self) -> None: + """ + Intermediate data flush that's triggered during the + buffering operation. Uploads data stored in memory to the S3. + """ + for table, data in self._buffer.items(): + key_list, ts_list, payload = zip(*data) + upload_data = [pa.array(key_list), pa.array(ts_list), pa.array(payload)] + pa_table = pa.table(upload_data, names=["_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_data"]) + pq.write_to_dataset(table=pa_table, root_path=f"{self.s3_bucket}/airbyte_output/{self.unique_dir}/{table}", filesystem=self.fs) + # Update tables + self._updated_tables.update(self._buffer.keys()) + self._buffer.clear() + self._values = 0 + + def flush(self) -> None: + """ + Flush any leftover data after ingestion and write from S3 to Firebolt. + Intermediate data on S3 and External Table will be deleted after write is complete. + """ + self._flush() + for table in self._updated_tables: + self.create_raw_table(table) + self.create_external_table(table) + self.ingest_data(table) + self.cleanup(table) + + def create_external_table(self, name: str) -> None: + """ + Create Firebolt External Table to interface with the files on S3. + + :param name: Stream name from which the table name is derived. + """ + query = f""" + CREATE EXTERNAL TABLE IF NOT EXISTS ex_airbyte_raw_{name} ( + _airbyte_ab_id TEXT, + _airbyte_emitted_at TIMESTAMP, + _airbyte_data TEXT + ) + URL = ? + CREDENTIALS = ( AWS_KEY_ID = ? AWS_SECRET_KEY = ? ) + OBJECT_PATTERN = '*.parquet' + TYPE = (PARQUET); + """ + cursor = self.connection.cursor() + cursor.execute(query, parameters=(f"s3://{self.s3_bucket}/airbyte_output/{self.unique_dir}/{name}", self.key_id, self.secret_key)) + + def ingest_data(self, name: str) -> None: + """ + Write data from External Table to the _airbyte_raw table effectively + persisting data in Firebolt. + + :param name: Stream name from which the table name is derived. + """ + query = f"INSERT INTO _airbyte_raw_{name} SELECT * FROM ex_airbyte_raw_{name}" + cursor = self.connection.cursor() + cursor.execute(query) + + def cleanup(self, name: str) -> None: + """ + Clean intermediary External tables and wipe the S3 folder. + + :param name: Stream name from which the table name is derived. + """ + cursor = self.connection.cursor() + cursor.execute(f"DROP TABLE IF EXISTS ex_airbyte_raw_{name}") + self.fs.delete_dir_contents(f"{self.s3_bucket}/airbyte_output/{self.unique_dir}/{name}") + + +class FireboltSQLWriter(FireboltWriter): + """ + Data writer using the SQL writing strategy. Data is buffered in memory + and flushed using INSERT INTO SQL statement. This is less effective strategy + better suited for testing and small data sets. + """ + + flush_interval = 1000 + + def __init__(self, connection: Connection) -> None: + """ + :param connection: Firebolt SDK connection class with established connection + to the databse. + """ + super().__init__(connection) + + def _flush(self) -> None: + """ + Intermediate data flush that's triggered during the + buffering operation. Writes data stored in memory via SQL commands. + """ + cursor = self.connection.cursor() + # id, written_at, data + for table, data in self._buffer.items(): + cursor.executemany(f"INSERT INTO _airbyte_raw_{table} VALUES (?, ?, ?)", parameters_seq=data) + self._buffer.clear() + self._values = 0 + + def flush(self) -> None: + """ + Final data flush after all data has been written to memory. + """ + self._flush() + + +def create_firebolt_wirter(connection: Connection, config: json, logger: AirbyteLogger) -> FireboltWriter: + if config["loading_method"]["method"] == "S3": + logger.info("Using the S3 writing strategy") + writer = FireboltS3Writer( + connection, + config["loading_method"]["s3_bucket"], + config["loading_method"]["aws_key_id"], + config["loading_method"]["aws_key_secret"], + config["loading_method"]["s3_region"], + ) + else: + logger.info("Using the SQL writing strategy") + writer = FireboltSQLWriter(connection) + return writer diff --git a/airbyte-integrations/connectors/destination-firebolt/icon.svg b/airbyte-integrations/connectors/destination-firebolt/icon.svg new file mode 100644 index 0000000..8b161e2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-firebolt/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/destination-firebolt/integration_tests/configured_catalog.json new file mode 100644 index 0000000..7715d5b --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/integration_tests/configured_catalog.json @@ -0,0 +1,38 @@ +{ + "streams": [ + { + "stream": { + "name": "airbyte_acceptance_table", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "column1": { + "type": "string" + }, + "column2": { + "type": "number" + }, + "column3": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "column4": { + "type": "number" + }, + "column5": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/destination-firebolt/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-firebolt/integration_tests/integration_test.py new file mode 100644 index 0000000..872db32 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/integration_tests/integration_test.py @@ -0,0 +1,147 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import random +import string +from datetime import datetime +from json import dumps, load +from typing import Dict +from unittest.mock import MagicMock + +from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, Status, Type +from airbyte_cdk.models.airbyte_protocol import ( + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + SyncMode, +) +from destination_firebolt.destination import DestinationFirebolt, establish_connection +from firebolt.common.exception import FireboltError +from pytest import fixture, mark, raises + + +@fixture(scope="module") +def config() -> Dict[str, str]: + with open( + "secrets/config.json", + ) as f: + yield load(f) + + +@fixture(scope="module") +def test_table_name() -> str: + letters = string.ascii_lowercase + rnd_string = "".join(random.choice(letters) for i in range(10)) + return f"airbyte_integration_{rnd_string}" + + +@fixture +def cleanup(config: Dict[str, str], test_table_name: str): + yield + with establish_connection(config, MagicMock()) as connection: + with connection.cursor() as cursor: + cursor.execute(f"DROP TABLE IF EXISTS _airbyte_raw_{test_table_name}") + cursor.execute(f"DROP TABLE IF EXISTS ex_airbyte_raw_{test_table_name}") + + +@fixture +def table_schema() -> str: + schema = { + "type": "object", + "properties": { + "column1": {"type": ["null", "string"]}, + }, + } + return schema + + +@fixture +def configured_catalogue(test_table_name: str, table_schema: str) -> ConfiguredAirbyteCatalog: + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name=test_table_name, json_schema=table_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + return ConfiguredAirbyteCatalog(streams=[append_stream]) + + +@fixture(scope="module") +def invalid_config() -> Dict[str, str]: + with open( + "integration_tests/invalid_config.json", + ) as f: + yield load(f) + + +@fixture(scope="module") +def invalid_config_s3() -> Dict[str, str]: + with open( + "integration_tests/invalid_config_s3.json", + ) as f: + yield load(f) + + +@fixture +def airbyte_message1(test_table_name: str): + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream=test_table_name, + data={"key1": "value1", "key2": 2}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@fixture +def airbyte_message2(test_table_name: str): + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream=test_table_name, + data={"key1": "value2", "key2": 3}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@mark.parametrize("config", ["invalid_config", "invalid_config_s3"]) +def test_check_fails(config, request): + destination = DestinationFirebolt() + status = destination.check(logger=MagicMock(), config=config) + assert status.status == Status.FAILED + + +def test_check_succeeds(config, request): + destination = DestinationFirebolt() + status = destination.check(logger=MagicMock(), config=config) + assert status.status == Status.SUCCEEDED + + +def test_write( + config: Dict[str, str], + configured_catalogue: ConfiguredAirbyteCatalog, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + test_table_name: str, + cleanup, + request, +): + destination = DestinationFirebolt() + generator = destination.write(config, configured_catalogue, [airbyte_message1, airbyte_message2]) + result = list(generator) + assert len(result) == 0 + with establish_connection(config, MagicMock()) as connection: + with connection.cursor() as cursor: + cursor.execute( + f"SELECT _airbyte_ab_id, _airbyte_emitted_at, _airbyte_data FROM _airbyte_raw_{test_table_name} ORDER BY _airbyte_data" + ) + result = cursor.fetchall() + # Make sure no temporary tables present + with raises(FireboltError): + cursor.execute(f"SELECT TOP 0 * FROM ex_airbyte_raw_{test_table_name}") + assert len(result) == 2 + assert result[0][2] == dumps(airbyte_message1.record.data) + assert result[1][2] == dumps(airbyte_message2.record.data) diff --git a/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config.json b/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config.json new file mode 100644 index 0000000..f8251d5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config.json @@ -0,0 +1,9 @@ +{ + "username": "xxx", + "password": "xxx", + "database": "non_existing_database_name", + "engine": "database_name_Analytics", + "loading_method": { + "method": "SQL" + } +} diff --git a/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config_s3.json b/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config_s3.json new file mode 100644 index 0000000..2ab29e8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/integration_tests/invalid_config_s3.json @@ -0,0 +1,13 @@ +{ + "username": "xxx", + "password": "xxx", + "database": "non_existing_database_name", + "engine": "database_name_Analytics", + "loading_method": { + "method": "S3", + "s3_bucket": "sample_bucket", + "s3_region": "us-east-1", + "aws_key_id": "yyy", + "aws_key_secret": "yyy" + } +} diff --git a/airbyte-integrations/connectors/destination-firebolt/integration_tests/messages.jsonl b/airbyte-integrations/connectors/destination-firebolt/integration_tests/messages.jsonl new file mode 100644 index 0000000..ab871c1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/integration_tests/messages.jsonl @@ -0,0 +1,2 @@ +{"type": "RECORD", "record": {"stream": "airbyte_acceptance_table", "data": {"column1": "my_value", "column2": 221, "column3": "2021-01-01T20:10:22", "column4": 1.214, "column5": [1,2,3]}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "airbyte_acceptance_table", "data": {"column1": "my_value2", "column2": 222, "column3": "2021-01-02T22:10:22", "column5": [1,2,null]}, "emitted_at": 1626172757000}} diff --git a/airbyte-integrations/connectors/destination-firebolt/main.py b/airbyte-integrations/connectors/destination-firebolt/main.py new file mode 100644 index 0000000..1b173be --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_firebolt import DestinationFirebolt + +if __name__ == "__main__": + DestinationFirebolt().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-firebolt/metadata.yaml b/airbyte-integrations/connectors/destination-firebolt/metadata.yaml new file mode 100644 index 0000000..bc04b2e --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/metadata.yaml @@ -0,0 +1,25 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 18081484-02a5-4662-8dba-b270b582f321 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-firebolt + githubIssueLabel: destination-firebolt + icon: firebolt.svg + license: MIT + name: Firebolt + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/firebolt + supportsDbt: true + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-firebolt/requirements.txt b/airbyte-integrations/connectors/destination-firebolt/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-firebolt/setup.py b/airbyte-integrations/connectors/destination-firebolt/setup.py new file mode 100644 index 0000000..a2597d9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/setup.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "firebolt-sdk>=0.8.0", "pyarrow"] + +TEST_REQUIREMENTS = ["pytest~=6.1"] + +setup( + name="destination_firebolt", + description="Destination implementation for Firebolt.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_firebolt_destination.py b/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_firebolt_destination.py new file mode 100644 index 0000000..8d70a10 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_firebolt_destination.py @@ -0,0 +1,241 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from datetime import datetime +from typing import Any, Dict +from unittest.mock import MagicMock, call, patch + +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_firebolt.destination import DestinationFirebolt, establish_connection, parse_config +from pytest import fixture + + +@fixture(params=["my_engine", "my_engine.api.firebolt.io"]) +def config(request: Any) -> Dict[str, str]: + args = { + "database": "my_database", + "username": "my_username", + "password": "my_password", + "engine": request.param, + "loading_method": { + "method": "SQL", + }, + } + return args + + +@fixture +def config_external_table() -> Dict[str, str]: + args = { + "database": "my_database", + "username": "my_username", + "password": "my_password", + "engine": "my_engine", + "loading_method": { + "method": "S3", + "s3_bucket": "my_bucket", + "s3_region": "us-east-1", + "aws_key_id": "aws_key", + "aws_key_secret": "aws_secret", + }, + } + return args + + +@fixture +def config_no_engine() -> Dict[str, str]: + args = { + "database": "my_database", + "username": "my_username", + "password": "my_password", + } + return args + + +@fixture +def logger() -> MagicMock: + return MagicMock() + + +@fixture +def configured_stream1() -> ConfiguredAirbyteStream: + return ConfiguredAirbyteStream( + stream=AirbyteStream( + name="table1", + json_schema={ + "type": "object", + "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}}, + }, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + +@fixture +def configured_stream2() -> ConfiguredAirbyteStream: + return ConfiguredAirbyteStream( + stream=AirbyteStream( + name="table2", + json_schema={ + "type": "object", + "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}}, + }, + supported_sync_modes=[SyncMode.incremental], + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + +@fixture +def airbyte_message1() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="table1", + data={"key1": "value1", "key2": 2}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@fixture +def airbyte_message2() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="table2", + data={"key1": "value2", "key2": 3}, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + + +@fixture +def airbyte_state_message() -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE) + + +def test_parse_config(config: Dict[str, str]): + config["engine"] = "override_engine" + result = parse_config(config) + assert result["database"] == "my_database" + assert result["engine_name"] == "override_engine" + assert result["auth"].username == "my_username" + assert result["auth"].password == "my_password" + config["engine"] = "override_engine.api.firebolt.io" + result = parse_config(config) + assert result["engine_url"] == "override_engine.api.firebolt.io" + + +@patch("destination_firebolt.destination.connect", MagicMock()) +def test_connection(config: Dict[str, str], config_no_engine: Dict[str, str], logger: MagicMock) -> None: + establish_connection(config, logger) + logger.reset_mock() + establish_connection(config_no_engine, logger) + assert any(["default engine" in msg.args[0] for msg in logger.info.mock_calls]), "No message on using default engine" + # Check no log object + establish_connection(config) + + +@patch("destination_firebolt.writer.FireboltS3Writer") +@patch("destination_firebolt.destination.connect") +def test_check( + mock_connection: MagicMock, mock_writer: MagicMock, config: Dict[str, str], config_external_table: Dict[str, str], logger: MagicMock +): + destination = DestinationFirebolt() + status = destination.check(logger, config) + assert status.status == Status.SUCCEEDED + mock_writer.assert_not_called() + status = destination.check(logger, config_external_table) + assert status.status == Status.SUCCEEDED + mock_writer.assert_called_once() + mock_connection().__enter__().cursor().__enter__().execute.side_effect = Exception("my exception") + status = destination.check(logger, config) + assert status.status == Status.FAILED + + +@patch("destination_firebolt.writer.FireboltSQLWriter") +@patch("destination_firebolt.destination.establish_connection") +def test_sql_write_append( + mock_connection: MagicMock, + mock_writer: MagicMock, + config: Dict[str, str], + configured_stream1: ConfiguredAirbyteStream, + configured_stream2: ConfiguredAirbyteStream, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, +) -> None: + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2]) + + destination = DestinationFirebolt() + result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2]) + + assert list(result) == [airbyte_state_message] + mock_writer.return_value.delete_table.assert_not_called() + mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")] + assert len(mock_writer.return_value.queue_write_data.mock_calls) == 2 + mock_writer.return_value.flush.assert_called_once() + + +@patch("destination_firebolt.writer.FireboltS3Writer") +@patch("destination_firebolt.writer.FireboltSQLWriter") +@patch("destination_firebolt.destination.establish_connection") +def test_sql_write_overwrite( + mock_connection: MagicMock, + mock_writer: MagicMock, + mock_s3_writer: MagicMock, + config: Dict[str, str], + configured_stream1: ConfiguredAirbyteStream, + configured_stream2: ConfiguredAirbyteStream, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, +): + # Overwrite triggers a delete + configured_stream1.destination_sync_mode = DestinationSyncMode.overwrite + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2]) + + destination = DestinationFirebolt() + result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2]) + + mock_s3_writer.assert_not_called() + assert list(result) == [airbyte_state_message] + mock_writer.return_value.delete_table.assert_called_once_with("table1") + mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")] + + +@patch("destination_firebolt.writer.FireboltS3Writer") +@patch("destination_firebolt.writer.FireboltSQLWriter") +@patch("destination_firebolt.destination.establish_connection", MagicMock()) +def test_s3_write( + mock_sql_writer: MagicMock, + mock_s3_writer: MagicMock, + config_external_table: Dict[str, str], + configured_stream1: ConfiguredAirbyteStream, + configured_stream2: ConfiguredAirbyteStream, + airbyte_message1: AirbyteMessage, + airbyte_message2: AirbyteMessage, + airbyte_state_message: AirbyteMessage, +): + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2]) + + destination = DestinationFirebolt() + result = destination.write(config_external_table, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2]) + assert list(result) == [airbyte_state_message] + mock_sql_writer.assert_not_called() + mock_s3_writer.assert_called_once() diff --git a/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_writer.py b/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_writer.py new file mode 100644 index 0000000..6ca5b69 --- /dev/null +++ b/airbyte-integrations/connectors/destination-firebolt/unit_tests/test_writer.py @@ -0,0 +1,156 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Union +from unittest.mock import ANY, MagicMock, call, patch + +from destination_firebolt.writer import FireboltS3Writer, FireboltSQLWriter +from pytest import fixture, mark + + +@fixture +def connection() -> MagicMock: + return MagicMock() + + +@fixture +def sql_writer(connection: MagicMock) -> FireboltSQLWriter: + return FireboltSQLWriter(connection) + + +@fixture +@patch("destination_firebolt.writer.time", MagicMock(return_value=111)) +@patch("destination_firebolt.writer.uuid4", MagicMock(return_value="dummy-uuid")) +def s3_writer(connection: MagicMock) -> FireboltS3Writer: + # Make sure S3FileSystem mock is reset each time + with patch("destination_firebolt.writer.fs.S3FileSystem", MagicMock()): + return FireboltS3Writer(connection, "dummy_bucket", "access_key", "secret_key", "us-east-1") + + +def test_sql_default(sql_writer: FireboltSQLWriter) -> None: + assert len(sql_writer._buffer) == 0 + assert sql_writer.flush_interval == 1000 + + +@mark.parametrize("writer", ["sql_writer", "s3_writer"]) +def test_sql_create(connection: MagicMock, writer: Union[FireboltSQLWriter, FireboltS3Writer], request: Any) -> None: + writer = request.getfixturevalue(writer) + expected_query = """ + CREATE FACT TABLE IF NOT EXISTS _airbyte_raw_dummy ( + _airbyte_ab_id TEXT, + _airbyte_emitted_at TIMESTAMP, + _airbyte_data TEXT + ) + PRIMARY INDEX _airbyte_ab_id + """ + writer.create_raw_table("dummy") + connection.cursor.return_value.execute.assert_called_once_with(expected_query) + + +def test_data_buffering(sql_writer: FireboltSQLWriter) -> None: + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + sql_writer._buffer["dummy"][0] == ("id1", 20200101, '{"key": "value"}') + assert len(sql_writer._buffer["dummy"]) == 1 + assert len(sql_writer._buffer.keys()) == 1 + sql_writer.queue_write_data("dummy", "id2", 20200102, '{"key2": "value2"}') + sql_writer._buffer["dummy"][0] == ("id2", 20200102, '{"key2": "value2"}') + assert len(sql_writer._buffer["dummy"]) == 2 + assert len(sql_writer._buffer.keys()) == 1 + sql_writer.queue_write_data("dummy2", "id3", 20200103, '{"key3": "value3"}') + sql_writer._buffer["dummy"][0] == ("id3", 20200103, '{"key3": "value3"}') + assert len(sql_writer._buffer["dummy"]) == 2 + assert len(sql_writer._buffer["dummy2"]) == 1 + assert len(sql_writer._buffer.keys()) == 2 + + +def test_data_auto_flush_one_table(connection: MagicMock, sql_writer: FireboltSQLWriter) -> None: + sql_writer.flush_interval = 2 + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + connection.cursor.return_value.executemany.assert_not_called() + assert sql_writer._values == 1 + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + connection.cursor.return_value.executemany.assert_called_once() + assert len(sql_writer._buffer.keys()) == 0 + assert sql_writer._values == 0 + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + assert len(sql_writer._buffer.keys()) == 1 + + +def test_data_auto_flush_multi_tables(connection: MagicMock, sql_writer: FireboltSQLWriter) -> None: + sql_writer.flush_interval = 2 + sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + connection.cursor.return_value.executemany.assert_not_called() + assert sql_writer._values == 1 + sql_writer.queue_write_data("dummy2", "id1", 20200101, '{"key": "value"}') + assert len(connection.cursor.return_value.executemany.mock_calls) == 2 + assert len(sql_writer._buffer.keys()) == 0 + assert sql_writer._values == 0 + + +def test_s3_default(s3_writer: FireboltS3Writer) -> None: + assert s3_writer.flush_interval == 100000 + assert s3_writer._values == 0 + assert len(s3_writer._buffer.keys()) == 0 + + +def test_s3_delete_tables(connection: MagicMock, s3_writer: FireboltS3Writer) -> None: + expected_sql = "DROP TABLE IF EXISTS _airbyte_raw_dummy" + s3_writer.delete_table("dummy") + connection.cursor.return_value.execute.assert_called_once_with(expected_sql) + + +@patch("pyarrow.parquet.write_to_dataset") +def test_s3_data_auto_flush_one_table(mock_write: MagicMock, s3_writer: FireboltS3Writer) -> None: + s3_writer.flush_interval = 2 + s3_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + mock_write.assert_not_called() + assert s3_writer._values == 1 + s3_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + mock_write.assert_called_once_with(table=ANY, root_path="dummy_bucket/airbyte_output/111_dummy-uuid/dummy", filesystem=s3_writer.fs) + assert len(s3_writer._buffer.keys()) == 0 + assert s3_writer._values == 0 + assert s3_writer._updated_tables == set(["dummy"]) + mock_write.reset_mock() + s3_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + mock_write.assert_not_called() + assert len(s3_writer._buffer.keys()) == 1 + assert s3_writer._updated_tables == set(["dummy"]) + + +@patch("pyarrow.parquet.write_to_dataset") +def test_s3_data_auto_flush_multi_tables(mock_write: MagicMock, s3_writer: FireboltS3Writer) -> None: + s3_writer.flush_interval = 2 + s3_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}') + mock_write.assert_not_called() + assert s3_writer._values == 1 + s3_writer.queue_write_data("dummy2", "id1", 20200101, '{"key": "value"}') + assert mock_write.mock_calls == [ + call(table=ANY, root_path="dummy_bucket/airbyte_output/111_dummy-uuid/dummy", filesystem=s3_writer.fs), + call(table=ANY, root_path="dummy_bucket/airbyte_output/111_dummy-uuid/dummy2", filesystem=s3_writer.fs), + ] + assert len(s3_writer._buffer.keys()) == 0 + assert s3_writer._values == 0 + assert s3_writer._updated_tables == set(["dummy", "dummy2"]) + + +def test_s3_final_flush(connection: MagicMock, s3_writer: FireboltS3Writer) -> None: + s3_writer._updated_tables = set(["dummy", "dummy2"]) + s3_writer.flush() + assert len(connection.cursor.return_value.execute.mock_calls) == 8 + expected_url1 = "s3://dummy_bucket/airbyte_output/111_dummy-uuid/dummy" + expected_url2 = "s3://dummy_bucket/airbyte_output/111_dummy-uuid/dummy2" + connection.cursor.return_value.execute.assert_any_call(ANY, parameters=(expected_url1, "access_key", "secret_key")) + connection.cursor.return_value.execute.assert_any_call(ANY, parameters=(expected_url2, "access_key", "secret_key")) + expected_query1 = "INSERT INTO _airbyte_raw_dummy SELECT * FROM ex_airbyte_raw_dummy" + expected_query2 = "INSERT INTO _airbyte_raw_dummy2 SELECT * FROM ex_airbyte_raw_dummy2" + connection.cursor.return_value.execute.assert_any_call(expected_query1) + connection.cursor.return_value.execute.assert_any_call(expected_query2) + + +def test_s3_cleanup(connection: MagicMock, s3_writer: FireboltS3Writer) -> None: + expected_sql = "DROP TABLE IF EXISTS ex_airbyte_raw_my_table" + bucket_path = "dummy_bucket/airbyte_output/111_dummy-uuid/my_table" + s3_writer.cleanup("my_table") + connection.cursor.return_value.execute.assert_called_once_with(expected_sql) + s3_writer.fs.delete_dir_contents.assert_called_once_with(bucket_path) diff --git a/airbyte-integrations/connectors/destination-keen/build.gradle b/airbyte-integrations/connectors/destination-keen/build.gradle new file mode 100644 index 0000000..777118d --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/build.gradle @@ -0,0 +1,33 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.keen.KeenDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + implementation 'org.apache.kafka:kafka-clients:2.8.0' + implementation 'com.joestelmach:natty:0.11' + + // TODO: remove this dependency + implementation libs.google.cloud.storage +} diff --git a/airbyte-integrations/connectors/destination-keen/icon.svg b/airbyte-integrations/connectors/destination-keen/icon.svg new file mode 100644 index 0000000..c092710 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-keen/metadata.yaml b/airbyte-integrations/connectors/destination-keen/metadata.yaml new file mode 100644 index 0000000..3eade61 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: api + connectorType: destination + definitionId: 81740ce8-d764-4ea7-94df-16bb41de36ae + dockerImageTag: 0.2.4 + dockerRepository: airbyte/destination-keen + githubIssueLabel: destination-keen + icon: chargify.svg + license: MIT + name: Chargify (Keen) + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/keen + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenCharactersStripper.java b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenCharactersStripper.java new file mode 100644 index 0000000..6886629 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenCharactersStripper.java @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import org.apache.commons.lang3.StringUtils; + +public class KeenCharactersStripper { + + // Keen collection names can't contain some special characters like non ascii accented characters + // while Kafka Topic names can't contain some other set of special characters, with except for -._ + // and whitespace characters + public static String stripSpecialCharactersFromStreamName(final String streamName) { + return StringUtils.stripAccents(streamName).replaceAll("[^A-Za-z0-9 -._]", ""); + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenDestination.java b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenDestination.java new file mode 100644 index 0000000..6e9f94d --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenDestination.java @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static org.apache.kafka.clients.CommonClientConfigs.SECURITY_PROTOCOL_CONFIG; +import static org.apache.kafka.clients.producer.ProducerConfig.ACKS_CONFIG; +import static org.apache.kafka.clients.producer.ProducerConfig.BOOTSTRAP_SERVERS_CONFIG; +import static org.apache.kafka.clients.producer.ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG; +import static org.apache.kafka.clients.producer.ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG; +import static org.apache.kafka.common.config.SaslConfigs.SASL_JAAS_CONFIG; +import static org.apache.kafka.common.config.SaslConfigs.SASL_MECHANISM; +import static org.apache.kafka.common.security.auth.SecurityProtocol.SASL_SSL; +import static org.apache.kafka.common.security.plain.internals.PlainSaslServer.PLAIN_MECHANISM; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.Properties; +import java.util.function.Consumer; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KeenDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(KeenDestination.class); + private static final String KAFKA_BROKER = "b1.kafka-in.keen.io:9092,b2.kafka-in.keen.io:9092,b3.kafka-in.keen.io:9092"; + + static final String KEEN_BASE_API_PATH = "https://api.keen.io/3.0"; + static final String CONFIG_PROJECT_ID = "project_id"; + static final String CONFIG_API_KEY = "api_key"; + static final String INFER_TIMESTAMP = "infer_timestamp"; + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try { + final String projectId = config.get(CONFIG_PROJECT_ID).textValue(); + final String apiKey = config.get(CONFIG_API_KEY).textValue(); + final KafkaProducer producer = KafkaProducerFactory.create(projectId, apiKey); + + // throws an AuthenticationException if authentication fails + producer.partitionsFor("ANYTHING"); + + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + return new AirbyteConnectionStatus().withStatus(Status.FAILED); + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) + throws Exception { + return new KeenRecordsConsumer(config, catalog, outputRecordCollector); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new KeenDestination(); + LOGGER.info("starting destination: {}", KeenDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", KeenDestination.class); + } + + public static class KafkaProducerFactory { + + public static KafkaProducer create(final String projectId, final String apiKey) { + final String jaasConfig = String.format("org.apache.kafka.common.security.plain.PlainLoginModule " + + "required username=\"%s\" password=\"%s\";", projectId, apiKey); + + final Properties props = new Properties(); + props.put(BOOTSTRAP_SERVERS_CONFIG, KAFKA_BROKER); + props.put(SECURITY_PROTOCOL_CONFIG, SASL_SSL.name()); + props.put(SASL_MECHANISM, PLAIN_MECHANISM); + props.put(SASL_JAAS_CONFIG, jaasConfig); + props.put(ACKS_CONFIG, "all"); + props.put(KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); + props.put(VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); + return new KafkaProducer<>(props); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenHttpClient.java b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenHttpClient.java new file mode 100644 index 0000000..f94f663 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenHttpClient.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static io.airbyte.integrations.destination.keen.KeenDestination.KEEN_BASE_API_PATH; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.net.http.HttpResponse.BodyHandlers; +import java.time.Duration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KeenHttpClient { + + private static final Logger LOGGER = LoggerFactory.getLogger(KeenHttpClient.class); + private static final String keenBaseApiPath = "https://api.keen.io/3.0"; + private static final int MINUTE_MILLIS = 1000 * 60; + final HttpClient httpClient = HttpClient.newHttpClient(); + final ObjectMapper objectMapper = new ObjectMapper(); + + public void eraseStream(final String streamToDelete, final String projectId, final String apiKey) + throws IOException, InterruptedException { + eraseStream(streamToDelete, projectId, apiKey, false); + } + + public void eraseStream(final String streamToDelete, final String projectId, final String apiKey, final boolean retried) + throws IOException, InterruptedException { + + final URI deleteUri = URI.create(String.format( + KEEN_BASE_API_PATH + "/projects/%s/events/%s", + projectId, streamToDelete)); + + final HttpRequest request = HttpRequest.newBuilder() + .uri(deleteUri) + .timeout(Duration.ofSeconds(30)) + .header("Authorization", apiKey) + .header("Content-Type", "application/json") + .DELETE() + .build(); + + final HttpResponse response = httpClient.send(request, BodyHandlers.ofString()); + + if (response.statusCode() != 204) { + if (response.statusCode() == 429 && !retried) { + LOGGER.info("Deletes limit exceeded. Sleeping 60 seconds."); + Thread.sleep(MINUTE_MILLIS); + eraseStream(streamToDelete, projectId, apiKey, true); + } else { + throw new IllegalStateException(String.format("Could not erase data from stream designed for overriding: " + + "%s. Error message: %s", streamToDelete, response.body())); + } + } + } + + public ArrayNode extract(final String streamName, final String projectId, final String apiKey) + throws IOException, InterruptedException { + final URI extractionUri = URI.create(String.format( + keenBaseApiPath + "/projects/%s/queries/extraction" + + "?api_key=%s&timeframe=this_7_years&event_collection=%s", + projectId, apiKey, streamName)); + + final HttpRequest request = HttpRequest.newBuilder() + .uri(extractionUri) + .timeout(Duration.ofSeconds(30)) + .header("Content-Type", "application/json") + .build(); + + final HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + + if (response.statusCode() != 200) { + throw new IllegalStateException("Server did not return successful response: " + response.body()); + } + + return (ArrayNode) objectMapper.readTree(response.body()).get("result"); + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenRecordsConsumer.java b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenRecordsConsumer.java new file mode 100644 index 0000000..62fef10 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenRecordsConsumer.java @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_API_KEY; +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_PROJECT_ID; +import static io.airbyte.integrations.destination.keen.KeenDestination.INFER_TIMESTAMP; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteMessage.Type; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KeenRecordsConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(KeenRecordsConsumer.class); + + private final JsonNode config; + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + + private KeenTimestampService timestampService; + private String projectId; + private String apiKey; + private KafkaProducer kafkaProducer; + private Set streamNames; + + public KeenRecordsConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + this.config = config; + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + this.kafkaProducer = null; + this.streamNames = Set.of(); + LOGGER.info("initializing consumer."); + } + + @Override + protected void startTracked() throws IOException, InterruptedException { + projectId = config.get(CONFIG_PROJECT_ID).textValue(); + apiKey = config.get(CONFIG_API_KEY).textValue(); + final boolean timestampInferenceEnabled = Optional.ofNullable(config.get(INFER_TIMESTAMP)) + .map(JsonNode::booleanValue) + .orElse(true); + this.kafkaProducer = KeenDestination.KafkaProducerFactory.create(projectId, apiKey); + this.streamNames = getStrippedStreamNames(); + this.timestampService = new KeenTimestampService(this.catalog, timestampInferenceEnabled); + eraseOverwriteStreams(); + } + + @Override + protected void acceptTracked(final AirbyteMessage msg) { + if (msg.getType() == Type.STATE) { + outputRecordCollector.accept(msg); + return; + } else if (msg.getType() != Type.RECORD) { + return; + } + + final String streamName = getStreamName(msg.getRecord()); + final JsonNode data = this.timestampService.injectTimestamp(msg.getRecord()); + + kafkaProducer.send(new ProducerRecord<>(streamName, data.toString())); + } + + private Set getStrippedStreamNames() { + return catalog.getStreams() + .stream() + .map(ConfiguredAirbyteStream::getStream) + .map(AirbyteStream::getName) + .map(KeenCharactersStripper::stripSpecialCharactersFromStreamName) + .collect(Collectors.toSet()); + } + + private void eraseOverwriteStreams() throws IOException, InterruptedException { + final KeenHttpClient keenHttpClient = new KeenHttpClient(); + LOGGER.info("erasing streams with override options selected."); + + final List streamsToDelete = this.catalog.getStreams().stream() + .filter(stream -> stream.getDestinationSyncMode() == DestinationSyncMode.OVERWRITE) + .map(stream -> KeenCharactersStripper.stripSpecialCharactersFromStreamName(stream.getStream().getName())) + .collect(Collectors.toList()); + + for (final String streamToDelete : streamsToDelete) { + LOGGER.info("erasing stream " + streamToDelete); + keenHttpClient.eraseStream(streamToDelete, projectId, apiKey); + } + } + + private String getStreamName(final AirbyteRecordMessage recordMessage) { + String streamName = recordMessage.getStream(); + if (streamNames.contains(streamName)) { + return streamName; + } + streamName = KeenCharactersStripper.stripSpecialCharactersFromStreamName(streamName); + if (!streamNames.contains(streamName)) { + throw new IllegalArgumentException( + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s , \nmessage: %s", + Jsons.serialize(catalog), Jsons.serialize(recordMessage))); + } + return streamName; + } + + @Override + protected void close(final boolean hasFailed) { + kafkaProducer.flush(); + kafkaProducer.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenTimestampService.java b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenTimestampService.java new file mode 100644 index 0000000..43dfb12 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/java/io/airbyte/integrations/destination/keen/KeenTimestampService.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.joestelmach.natty.Parser; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * This class is used for timestamp inference. Keen leverages use of time-related data for it's + * analytics, so it's important to have timestamp values for historical data if possible. If stream + * contains cursor field, then its value is used as a timestamp, if parsing it is possible. + */ +public class KeenTimestampService { + + private static final Logger LOGGER = LoggerFactory.getLogger(KeenRecordsConsumer.class); + + private static final long SECONDS_FROM_EPOCH_THRESHOLD = 1_000_000_000L; + + private static final long MILLIS_FROM_EPOCH_THRESHOLD = 10_000_000_000L; + + // Map containing stream names paired with their cursor fields + private Map> streamCursorFields; + private final Parser parser; + private final boolean timestampInferenceEnabled; + + public KeenTimestampService(final ConfiguredAirbyteCatalog catalog, final boolean timestampInferenceEnabled) { + this.streamCursorFields = new HashMap<>(); + this.parser = new Parser(); + this.timestampInferenceEnabled = timestampInferenceEnabled; + + if (timestampInferenceEnabled) { + LOGGER.info("Initializing KeenTimestampService, finding cursor fields."); + streamCursorFields = catalog.getStreams() + .stream() + .filter(stream -> stream.getCursorField().size() > 0) + .map(s -> Pair.of(s.getStream().getName(), s.getCursorField())) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + } + + /** + * Tries to inject keen.timestamp field to the given message data. If the stream contains cursor + * field, it's value is tried to be parsed to timestamp. If this procedure fails, stream is removed + * from timestamp-parsable stream map, so parsing is not tried for future messages in the same + * stream. If parsing succeeds, keen.timestamp field is put as a JSON node to the message data and + * whole data is returned. Otherwise, keen.timestamp is set to emittedAt value + * + * @param message AirbyteRecordMessage containing record data + * @return Record data together with keen.timestamp field + */ + public JsonNode injectTimestamp(final AirbyteRecordMessage message) { + final String streamName = message.getStream(); + final List cursorField = streamCursorFields.get(streamName); + final JsonNode data = message.getData(); + if (timestampInferenceEnabled && cursorField != null) { + try { + final String timestamp = parseTimestamp(cursorField, data); + injectTimestamp(data, timestamp); + } catch (final Exception e) { + // If parsing of timestamp has failed, remove stream from timestamp-parsable stream map, + // so it won't be parsed for future messages. + LOGGER.info("Unable to parse cursor field: {} into a keen.timestamp", cursorField); + streamCursorFields.remove(streamName); + injectTimestamp(data, Instant.ofEpochMilli(message.getEmittedAt()).toString()); + } + } else { + injectTimestamp(data, Instant.ofEpochMilli(message.getEmittedAt()).toString()); + } + return data; + } + + private void injectTimestamp(final JsonNode data, final String timestamp) { + final ObjectNode root = ((ObjectNode) data); + root.set("keen", JsonNodeFactory.instance.objectNode().put("timestamp", timestamp)); + } + + private String parseTimestamp(final List cursorField, final JsonNode data) { + final JsonNode timestamp = getNestedNode(data, cursorField); + final long numberTimestamp = timestamp.asLong(); + // if cursor value is below given threshold, assume that it's not epoch timestamp but ordered id + if (numberTimestamp >= SECONDS_FROM_EPOCH_THRESHOLD) { + return dateFromNumber(numberTimestamp); + } + // if timestamp is 0, then parsing it to long failed - let's try with String now + if (numberTimestamp == 0) { + return parser + .parse(timestamp.asText()) + .get(0).getDates() + .get(0) + .toInstant() + .toString(); + } + throw new IllegalStateException(); + } + + private String dateFromNumber(final Long timestamp) { + // if cursor value is above given threshold, then assume that it's Unix timestamp in milliseconds + if (timestamp > MILLIS_FROM_EPOCH_THRESHOLD) { + return Instant.ofEpochMilli(timestamp).toString(); + } + return Instant.ofEpochSecond(timestamp).toString(); + } + + private static JsonNode getNestedNode(final JsonNode data, final List fieldNames) { + return fieldNames.stream().reduce(data, JsonNode::get, (first, second) -> second); + } + + public Map> getStreamCursorFields() { + return streamCursorFields; + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json new file mode 100644 index 0000000..084eb3f --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/main/resources/spec.json @@ -0,0 +1,35 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/keen", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Keen Spec", + "type": "object", + "required": ["project_id", "api_key"], + "additionalProperties": false, + "properties": { + "project_id": { + "description": "To get Keen Project ID, navigate to the Access tab from the left-hand, side panel and check the Project Details section.", + "title": "Project ID", + "type": "string", + "examples": ["58b4acc22ba938934e888322e"] + }, + "api_key": { + "title": "API Key", + "description": "To get Keen Master API Key, navigate to the Access tab from the left-hand, side panel and check the Project Details section.", + "type": "string", + "examples": ["ABCDEFGHIJKLMNOPRSTUWXYZ"], + "airbyte_secret": true + }, + "infer_timestamp": { + "title": "Infer Timestamp", + "description": "Allow connector to guess keen.timestamp value based on the streamed data.", + "type": "boolean", + "default": true + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test-integration/java/io/airbyte/integrations/destination/keen/KeenDestinationTest.java b/airbyte-integrations/connectors/destination-keen/src/test-integration/java/io/airbyte/integrations/destination/keen/KeenDestinationTest.java new file mode 100644 index 0000000..42f0242 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test-integration/java/io/airbyte/integrations/destination/keen/KeenDestinationTest.java @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_API_KEY; +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_PROJECT_ID; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.api.client.util.Lists; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +public class KeenDestinationTest extends DestinationAcceptanceTest { + + private static final String SECRET_FILE_PATH = "secrets/config.json"; + + private final KeenHttpClient keenHttpClient = new KeenHttpClient(); + private final Set collectionsToDelete = new HashSet<>(); + + private String projectId; + private String apiKey; + private JsonNode configJson; + + @Override + protected String getImageName() { + return "airbyte/destination-keen:dev"; + } + + @Override + protected JsonNode getConfig() throws Exception { + return configJson; + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected JsonNode getFailCheckConfig() throws Exception { + ((ObjectNode) configJson).put(CONFIG_PROJECT_ID, "fake"); + ((ObjectNode) configJson).put(CONFIG_API_KEY, "fake"); + + return configJson; + } + + protected JsonNode getBaseConfigJson() { + return Jsons.deserialize(IOs.readFile(Path.of(SECRET_FILE_PATH))); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws Exception { + final String accentStrippedStreamName = KeenCharactersStripper.stripSpecialCharactersFromStreamName(streamName); + collectionsToDelete.add(accentStrippedStreamName); + + final ArrayNode array = keenHttpClient.extract(accentStrippedStreamName, projectId, apiKey); + return Lists.newArrayList(array.elements()).stream() + .sorted(Comparator.comparing(o -> o.get("keen").get("timestamp").textValue())) + .map(node -> (JsonNode) ((ObjectNode) node).without("keen")) + .collect(Collectors.toList()); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) throws Exception { + if (!Files.exists(Path.of(SECRET_FILE_PATH))) { + throw new IllegalStateException( + "Must provide path to a file containing Keen account credentials: Project ID and Master API Key. " + + "By default {module-root}/" + SECRET_FILE_PATH); + } + configJson = getBaseConfigJson(); + projectId = configJson.get(CONFIG_PROJECT_ID).asText(); + apiKey = configJson.get(CONFIG_API_KEY).asText(); + + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) throws Exception { + for (final String keenCollection : collectionsToDelete) { + keenHttpClient.eraseStream(keenCollection, projectId, apiKey); + } + collectionsToDelete.clear(); + } + + @Override + protected void runSyncAndVerifyStateOutput(final JsonNode config, + final List messages, + final ConfiguredAirbyteCatalog catalog, + final boolean runNormalization) + throws Exception { + super.runSyncAndVerifyStateOutput(config, messages, catalog, runNormalization); + Thread.sleep(10000); + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenRecordConsumerTest.java b/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenRecordConsumerTest.java new file mode 100644 index 0000000..a6dd785 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenRecordConsumerTest.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_API_KEY; +import static io.airbyte.integrations.destination.keen.KeenDestination.CONFIG_PROJECT_ID; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.List; +import java.util.function.Consumer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@DisplayName("KafkaRecordConsumer") +@ExtendWith(MockitoExtension.class) +public class KeenRecordConsumerTest extends PerStreamStateMessageTest { + + private static final String SCHEMA_NAME = "public"; + private static final String STREAM_NAME = "id_and_name"; + + private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + STREAM_NAME, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)))); + @Mock + private Consumer outputRecordCollector; + + private KeenRecordsConsumer consumer; + + @BeforeEach + public void init() { + final JsonNode config = Jsons.jsonNode(ImmutableMap.builder() + .put(CONFIG_PROJECT_ID, "test_project") + .put(CONFIG_API_KEY, "test_apikey") + .build()); + consumer = new KeenRecordsConsumer(config, CATALOG, outputRecordCollector); + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenTimestampServiceTest.java b/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenTimestampServiceTest.java new file mode 100644 index 0000000..421f0fe --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/java/io/airbyte/integrations/destination/keen/KeenTimestampServiceTest.java @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.keen; + +import static java.util.Map.entry; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.v0.AirbyteCatalog; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class KeenTimestampServiceTest { + + private final ObjectMapper objectMapper = new ObjectMapper(); + + @Test + void shouldInitializeCursorFieldsFromCatalog() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("cursors_catalog.json"); + + final Map> expectedCursorFieldsMap = Map.ofEntries( + entry("StringTypeStream1", List.of("property1")), + entry("StringTypeStream2", List.of("property1")), + entry("StringTypeStream3", List.of("property1")), + entry("NumberTypeStream1", List.of("property1")), + entry("NumberTypeStream2", List.of("property1")), + entry("ArrayTypeStream1", List.of("property1")), + entry("ArrayTypeStream2", List.of("property1")), + entry("ArrayTypeStream3", List.of("property1")), + entry("NestedCursorStream", List.of("property1", "inside"))); + + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final Map> cursorFieldMap = keenTimestampService.getStreamCursorFields(); + Assertions.assertEquals(expectedCursorFieldsMap, cursorFieldMap); + } + + @Test + void shouldInjectTimestampWhenCursorIsValidString() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("string_cursor_catalog.json"); + + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, "1999/12/15 14:44 utc"); + final JsonNode expectedJson = buildExpectedJsonWithTimestamp("\"1999/12/15 14:44 utc\"", "1999-12-15T14:44:00Z"); + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + @Test + void shouldInjectNumberTimestampWhenTimestampIsSeconds() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("number_cursor_catalog.json"); + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final int secondsCursor = 1628080068; + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, secondsCursor); + final JsonNode expectedJson = buildExpectedJsonWithTimestamp(secondsCursor, "2021-08-04T12:27:48Z"); + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + @Test + void shouldInjectNumberTimestampWhenTimestampIsMillis() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("number_cursor_catalog.json"); + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final long millisCursor = 1628081113151L; + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, millisCursor); + final JsonNode expectedJson = buildExpectedJsonWithTimestamp(millisCursor, "2021-08-04T12:45:13.151Z"); + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + @Test + void shouldInjectEmittedAtWhenCursorNumberValueIsTooLow() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("number_cursor_catalog.json"); + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final int notUnixTimestampCursor = 250_000; + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, notUnixTimestampCursor); + + // 2020-10-14T01:09:49.200Z is hardcoded emitted at + final JsonNode expectedJson = buildExpectedJsonWithTimestamp(notUnixTimestampCursor, "2020-10-14T01:09:49.200Z"); + + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + @Test + void shouldInjectEmittedAtWhenCursorIsUnparsableAndRemoveFieldFromMap() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("string_cursor_catalog.json"); + + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final Map> cursorFieldMap = keenTimestampService.getStreamCursorFields(); + Assertions.assertEquals(cursorFieldMap.size(), 1); + + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, "some_text"); + + // 2020-10-14T01:09:49.200Z is hardcoded emitted at + final JsonNode expectedJson = buildExpectedJsonWithTimestamp("\"some_text\"", "2020-10-14T01:09:49.200Z"); + + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + Assertions.assertEquals(cursorFieldMap.size(), 0); + } + + @Test + void shouldInjectEmittedAtWhenCursorIsValidAndInferenceIsDisabled() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("number_cursor_catalog.json"); + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, false); + + final int secondsCursor = 1628080068; + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, secondsCursor); + + // 2020-10-14T01:09:49.200Z is hardcoded emitted at + final JsonNode expectedJson = buildExpectedJsonWithTimestamp(secondsCursor, "2020-10-14T01:09:49.200Z"); + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + @Test + void shouldInjectTimestampWhenCursorIsNestedField() throws IOException { + final ConfiguredAirbyteCatalog configuredCatalog = readConfiguredCatalogFromFile("nested_cursor_catalog.json"); + final KeenTimestampService keenTimestampService = new KeenTimestampService(configuredCatalog, true); + + final int secondsCursor = 1628080068; + final AirbyteMessage message = buildMessageWithCursorValue(configuredCatalog, + ImmutableMap.builder().put("nestedProperty", secondsCursor).build()); + + final String nestedJson = String.format("{\"nestedProperty\": %s}", secondsCursor); + + final JsonNode expectedJson = buildExpectedJsonWithTimestamp(nestedJson, "2021-08-04T12:27:48Z"); + final JsonNode jsonNode = keenTimestampService.injectTimestamp(message.getRecord()); + + Assertions.assertEquals(jsonNode, expectedJson); + } + + private AirbyteMessage buildMessageWithCursorValue(final ConfiguredAirbyteCatalog configuredCatalog, final T cursorValue) { + return new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(configuredCatalog.getStreams().get(0).getStream().getName()) + .withEmittedAt(1602637789200L) + .withData(Jsons.jsonNode(ImmutableMap.builder() + .put("cursorProperty", cursorValue) + .put("otherProperty", "something") + .build()))); + } + + private JsonNode buildExpectedJsonWithTimestamp(final T value, final String parsedTimestamp) + throws JsonProcessingException { + return objectMapper.readTree( + String.format( + "{" + + "\"cursorProperty\": %s," + + "\"otherProperty\": \"something\"," + + "\"keen\" : { \"timestamp\": \"%s\"}" + + "}", + value, parsedTimestamp)); + } + + private ConfiguredAirbyteCatalog readConfiguredCatalogFromFile(final String fileName) + throws IOException { + final AirbyteCatalog catalog = Jsons.deserialize(MoreResources.readResource(fileName), AirbyteCatalog.class); + return new ConfiguredAirbyteCatalog() + .withStreams(catalog.getStreams() + .stream() + .map(this::toConfiguredStreamWithCursors) + .collect(Collectors.toList())); + } + + public ConfiguredAirbyteStream toConfiguredStreamWithCursors(final AirbyteStream stream) { + return new ConfiguredAirbyteStream() + .withStream(stream) + .withCursorField(stream.getDefaultCursorField()); + } + +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/resources/cursors_catalog.json b/airbyte-integrations/connectors/destination-keen/src/test/resources/cursors_catalog.json new file mode 100644 index 0000000..df442b5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/resources/cursors_catalog.json @@ -0,0 +1,124 @@ +{ + "streams": [ + { + "name": "StreamWithoutCursors", + "json_schema": { + "properties": { + "property1": { + "type": "string" + } + } + } + }, + { + "name": "StringTypeStream1", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": "string" + } + } + } + }, + { + "name": "StringTypeStream2", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": "varchar" + } + } + } + }, + { + "name": "StringTypeStream3", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": "time" + } + } + } + }, + { + "name": "NumberTypeStream1", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": "number" + } + } + } + }, + { + "name": "NumberTypeStream2", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": "integer" + } + } + } + }, + { + "name": "ArrayTypeStream1", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": ["null", "integer"] + } + } + } + }, + { + "name": "ArrayTypeStream2", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": [null, "integer"] + } + } + } + }, + { + "name": "ArrayTypeStream3", + "source_defined_cursor": true, + "default_cursor_field": ["property1"], + "json_schema": { + "properties": { + "property1": { + "type": ["anything", "integer", "anything"] + } + } + } + }, + { + "name": "NestedCursorStream", + "source_defined_cursor": true, + "default_cursor_field": ["property1", "inside"], + "json_schema": { + "properties": { + "property1": { + "inside": { + "type": "number" + } + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/resources/nested_cursor_catalog.json b/airbyte-integrations/connectors/destination-keen/src/test/resources/nested_cursor_catalog.json new file mode 100644 index 0000000..0c81f13 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/resources/nested_cursor_catalog.json @@ -0,0 +1,21 @@ +{ + "streams": [ + { + "name": "NumberTypeStream", + "source_defined_cursor": true, + "default_cursor_field": ["cursorProperty", "nestedProperty"], + "json_schema": { + "properties": { + "cursorProperty": { + "nestedProperty": { + "type": "number" + } + }, + "otherProperty": { + "type": "string" + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/resources/number_cursor_catalog.json b/airbyte-integrations/connectors/destination-keen/src/test/resources/number_cursor_catalog.json new file mode 100644 index 0000000..3b7deae --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/resources/number_cursor_catalog.json @@ -0,0 +1,19 @@ +{ + "streams": [ + { + "name": "NumberTypeStream", + "source_defined_cursor": true, + "default_cursor_field": ["cursorProperty"], + "json_schema": { + "properties": { + "cursorProperty": { + "type": "number" + }, + "otherProperty": { + "type": "string" + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-keen/src/test/resources/string_cursor_catalog.json b/airbyte-integrations/connectors/destination-keen/src/test/resources/string_cursor_catalog.json new file mode 100644 index 0000000..834e1e8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-keen/src/test/resources/string_cursor_catalog.json @@ -0,0 +1,19 @@ +{ + "streams": [ + { + "name": "StringTypeStream", + "source_defined_cursor": true, + "default_cursor_field": ["cursorProperty"], + "json_schema": { + "properties": { + "cursorProperty": { + "type": "string" + }, + "otherProperty": { + "type": "string" + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-kinesis/README.md b/airbyte-integrations/connectors/destination-kinesis/README.md new file mode 100644 index 0000000..d0647df --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/README.md @@ -0,0 +1,72 @@ +# Destination Kinesis + +This is the repository for the Kinesis destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/kinesis). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-kinesis:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-kinesis:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-kinesis:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-kinesis:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-kinesis:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-kinesis:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-kinesis:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/kinesis`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/kinesisDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-kinesis:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-kinesis:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-kinesis test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/kinesis.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-kinesis/bootstrap.md b/airbyte-integrations/connectors/destination-kinesis/bootstrap.md new file mode 100644 index 0000000..7ad2fc6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/bootstrap.md @@ -0,0 +1,22 @@ +# Kinesis Destination + +Amazon Kinesis makes it easy to collect, process, and analyze real-time, streaming data so you can get timely insights and react quickly to new information. Amazon Kinesis offers key capabilities to cost-effectively process streaming data at any scale, along with the flexibility to choose the tools that best suit the requirements of your application. +You can use Kinesis Data Streams for rapid and continuous data intake and aggregation. The type of data used can include IT infrastructure log data, application logs, social media, market data feeds, and web clickstream data. Because the response time for the data intake and processing is in real time, the processing is typically lightweight. +[Read more about Amazon Kinesis](https://aws.amazon.com/kinesis/) + +This connector maps an incoming Airbyte namespace and stream to a different Kinesis stream created and configured with the provided shard count. The connector +supports the `append` sync mode which enables records to be directly streamed to an existing Kinesis stream. + +The implementation uses the [Kinesis](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/examples-kinesis.html) Aws v2 Java Sdk to access the Kinesis service. +[KinesisStream](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java) is the main entrypoint for communicating with Kinesis and providing the needed functionalities. Internally it uses a KinesisClient retreived from the +[KinesisClientPool](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisClientPool.java). Retrieved records from the Kinesis stream are mapped to +[KinesisRecord](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisRecord.java). Buffering of records is also supported which should increase performance and throughput by sending the records through a single HTTP request. + +The [KinesisMessageConsumer](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisMessageConsumer.java) +class contains the logic for handling airbyte messages, creating the needed Kinesis streams and streaming the received data. + +## Development + +See the [KinesisStream](./src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java) class on how to use the Kinesis client for accessing the Kinesis service. + +If you want to learn more, read the [Aws docs](https://docs.aws.amazon.com/sdk-for-java/latest/developer-guide/examples-kinesis.html) \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-kinesis/build.gradle b/airbyte-integrations/connectors/destination-kinesis/build.gradle new file mode 100644 index 0000000..3abe284 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/build.gradle @@ -0,0 +1,37 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.kinesis.KinesisDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +def kinesisVersion = '2.17.75' +def testContainersVersion = '1.16.2' +def assertVersion = '3.21.0' + +dependencies { + + // https://mvnrepository.com/artifact/software.amazon.awssdk/kinesis + implementation "software.amazon.awssdk:kinesis:${kinesisVersion}" + + testImplementation "org.assertj:assertj-core:${assertVersion}" + testImplementation "org.testcontainers:localstack:${testContainersVersion}" +} diff --git a/airbyte-integrations/connectors/destination-kinesis/docker-compose.yml b/airbyte-integrations/connectors/destination-kinesis/docker-compose.yml new file mode 100644 index 0000000..64bafc3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/docker-compose.yml @@ -0,0 +1,14 @@ +version: "3.7" + +services: + kinesis: + image: localstack/localstack:0.12.20 + ports: + - "4566:4566" + environment: + - "SERVICES=kinesis" + - "HOSTNAME=localhost" + - "KINESIS_LATENCY=200" + - "KINESIS_SHARD_LIMIT=500" +# - "AWS_ACCESS_KEY_ID=" +# - "AWS_SECRET_ACCESS_KEY=" diff --git a/airbyte-integrations/connectors/destination-kinesis/icon.svg b/airbyte-integrations/connectors/destination-kinesis/icon.svg new file mode 100644 index 0000000..a435c7a --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-kinesis/metadata.yaml b/airbyte-integrations/connectors/destination-kinesis/metadata.yaml new file mode 100644 index 0000000..2487a91 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: api + connectorType: destination + definitionId: 6d1d66d4-26ab-4602-8d32-f85894b04955 + dockerImageTag: 0.1.5 + dockerRepository: airbyte/destination-kinesis + githubIssueLabel: destination-kinesis + icon: kinesis.svg + license: MIT + name: Kinesis + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/kinesis + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisClientPool.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisClientPool.java new file mode 100644 index 0000000..db424ba --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisClientPool.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import software.amazon.awssdk.services.kinesis.KinesisClient; + +/** + * KinesisClientPool class for managing a pool of kinesis clients with different configurations. + */ +public class KinesisClientPool { + + private static final ConcurrentHashMap> clients; + + static { + clients = new ConcurrentHashMap<>(); + } + + private KinesisClientPool() { + + } + + /** + * Initializes a Kinesis client for accessing Kinesis. If there is already an existing client with + * the provided configuration it will return the existing one and increase the usage count, if not + * it will return a new one. + * + * @param kinesisConfig used to configure the Kinesis client. + * @return KinesisClient which can be used to access Kinesis. + */ + public static KinesisClient initClient(KinesisConfig kinesisConfig) { + var cachedClient = clients.get(kinesisConfig); + if (cachedClient != null) { + cachedClient.value2().incrementAndGet(); + return cachedClient.value1(); + } else { + var client = KinesisUtils.buildKinesisClient(kinesisConfig); + clients.put(kinesisConfig, Tuple.of(client, new AtomicInteger(1))); + return client; + } + } + + /** + * Returns a Kinesis client to the pool. If the client is no longer used by any other external + * instances it will be closed and removed from the map, if not only its usage count will be + * decreased. + * + * @param kinesisConfig that was used to configure the Kinesis client. + */ + public static void closeClient(KinesisConfig kinesisConfig) { + var cachedClient = clients.get(kinesisConfig); + if (cachedClient == null) { + throw new IllegalStateException("No session for the provided config"); + } + int count = cachedClient.value2().decrementAndGet(); + if (count < 1) { + cachedClient.value1().close(); + clients.remove(kinesisConfig); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisConfig.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisConfig.java new file mode 100644 index 0000000..674fb33 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisConfig.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.databind.JsonNode; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Objects; + +/* + * KinesisConfig class for storing immutable configuration for Kinesis. + */ +public class KinesisConfig { + + private final URI endpoint; + + private final String region; + + private final int shardCount; + + private final String accessKey; + + private final String privateKey; + + private final int bufferSize; + + public KinesisConfig(URI endpoint, + String region, + int shardCount, + String accessKey, + String privateKey, + int bufferSize) { + this.endpoint = endpoint; + this.region = region; + this.shardCount = shardCount; + this.accessKey = accessKey; + this.privateKey = privateKey; + this.bufferSize = bufferSize; + } + + public KinesisConfig(JsonNode jsonNode) { + String strend = jsonNode.get("endpoint").asText(); + try { + this.endpoint = strend != null && !strend.isBlank() ? new URI(strend) : null; + } catch (URISyntaxException e) { + throw new UncheckedURISyntaxException(e); + } + this.region = jsonNode.get("region").asText(); + this.shardCount = jsonNode.get("shardCount").asInt(5); + this.accessKey = jsonNode.get("accessKey").asText(); + this.privateKey = jsonNode.get("privateKey").asText(); + this.bufferSize = jsonNode.get("bufferSize").asInt(100); + } + + public URI getEndpoint() { + return endpoint; + } + + public String getRegion() { + return region; + } + + public int getShardCount() { + return shardCount; + } + + public String getAccessKey() { + return accessKey; + } + + public String getPrivateKey() { + return privateKey; + } + + public int getBufferSize() { + return bufferSize; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + KinesisConfig that = (KinesisConfig) o; + return Objects.equals(endpoint, that.endpoint) && Objects.equals(region, that.region) && + accessKey.equals(that.accessKey) && privateKey.equals(that.privateKey); + } + + @Override + public int hashCode() { + return Objects.hash(endpoint, region, accessKey, privateKey); + } + + static class UncheckedURISyntaxException extends RuntimeException { + + public UncheckedURISyntaxException(Throwable cause) { + super(cause); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisDestination.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisDestination.java new file mode 100644 index 0000000..1c4cdd1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisDestination.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * KinesisDestination class for configuring Kinesis as an Airbyte destination. + */ +public class KinesisDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(KinesisDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new KinesisDestination()).run(args); + } + + /** + * Check Kinesis connection status with the provided Json configuration. + * + * @param config json configuration for connecting to Kinesis + * @return AirbyteConnectionStatus status of the connection result. + */ + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + KinesisStream kinesisStream = null; + var streamName = "test_stream"; + try { + var kinesisConfig = new KinesisConfig(config); + kinesisStream = new KinesisStream(kinesisConfig); + kinesisStream.createStream(streamName); + var partitionKey = KinesisUtils.buildPartitionKey(); + kinesisStream.putRecord(streamName, partitionKey, "{}", e -> {}); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Error while trying to connect to Kinesis: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (kinesisStream != null) { + try { + kinesisStream.flush(e -> {}); + kinesisStream.deleteStream(streamName); + } catch (Exception e) { + LOGGER.error("Error while deleting kinesis stream: ", e); + } + kinesisStream.close(); + } + } + } + + /** + * Returns an Airbyte message consumer which can be used to handle the incoming Airbyte messages. + * + * @param config json configuration for connecting to Kinesis + * @param configuredCatalog of the incoming stream. + * @param outputRecordCollector state collector. + * @return KinesisMessageConsumer for consuming Airbyte messages and streaming them to Kinesis. + */ + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog configuredCatalog, + final Consumer outputRecordCollector) { + final KinesisStream kinesisStream = new KinesisStream(new KinesisConfig(config)); + return new KinesisMessageConsumer(configuredCatalog, kinesisStream, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisMessageConsumer.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisMessageConsumer.java new file mode 100644 index 0000000..071db77 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisMessageConsumer.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.time.Instant; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * KinesisMessageConsumer class for handling incoming Airbyte messages. + */ +public class KinesisMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(KinesisMessageConsumer.class); + + private final Consumer outputRecordCollector; + + private final KinesisStream kinesisStream; + + private final Map kinesisStreams; + + public KinesisMessageConsumer(final ConfiguredAirbyteCatalog configuredCatalog, + final KinesisStream kinesisStream, + final Consumer outputRecordCollector) { + this.outputRecordCollector = outputRecordCollector; + this.kinesisStream = kinesisStream; + var nameTransformer = new KinesisNameTransformer(); + this.kinesisStreams = configuredCatalog.getStreams().stream() + .collect(Collectors.toUnmodifiableMap( + AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + k -> new KinesisStreamConfig( + nameTransformer.streamName(k.getStream().getNamespace(), k.getStream().getName()), + k.getDestinationSyncMode()))); + } + + /** + * Start tracking the incoming Airbyte streams by creating the needed Kinesis streams. + */ + @Override + protected void startTracked() { + kinesisStreams.forEach((k, v) -> kinesisStream.createStream(v.getStreamName())); + } + + /** + * Handle an incoming Airbyte message by serializing it to the appropriate Kinesis structure and + * sending it to the stream. + * + * @param message received from the Airbyte source. + */ + @Override + protected void acceptTracked(final AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + + var streamConfig = + kinesisStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + + var partitionKey = KinesisUtils.buildPartitionKey(); + + var data = Jsons.jsonNode(Map.of( + KinesisRecord.COLUMN_NAME_AB_ID, partitionKey, + KinesisRecord.COLUMN_NAME_DATA, Jsons.serialize(messageRecord.getData()), + KinesisRecord.COLUMN_NAME_EMITTED_AT, Instant.now())); + + var streamName = streamConfig.getStreamName(); + kinesisStream.putRecord(streamName, partitionKey, Jsons.serialize(data), e -> { + LOGGER.error("Error while streaming data to Kinesis", e); + // throw exception and end sync? + }); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + outputRecordCollector.accept(message); + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + /** + * Flush the Kinesis stream if there are any remaining messages to be sent and close the client as a + * terminal operation. + * + * @param hasFailed flag for indicating if the operation has failed. + */ + @Override + protected void close(final boolean hasFailed) { + try { + if (!hasFailed) { + kinesisStream.flush(e -> { + LOGGER.error("Error while streaming data to Kinesis", e); + }); + } + } finally { + kinesisStream.close(); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformer.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformer.java new file mode 100644 index 0000000..f9d0020 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformer.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +/** + * KinesisNameTransformer class for creating Kinesis stream names. + */ +class KinesisNameTransformer extends StandardNameTransformer { + + /** + * Create Kinesis destination stream name by combining the incoming namespace and stream + * + * @param namespace of the source data + * @param stream of the source data + */ + String streamName(String namespace, String stream) { + namespace = namespace != null ? namespace : ""; + var streamName = namespace + "_" + stream; + streamName = super.convertStreamName(streamName); + // max char length for kinesis stream name is 128 + return streamName.length() > 128 ? streamName.substring(0, 128) : streamName; + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisRecord.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisRecord.java new file mode 100644 index 0000000..27ee0ec --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisRecord.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.annotation.JsonProperty; +import java.time.Instant; +import java.util.UUID; + +/** + * KinesisRecord class for mapping records in the Kinesis stream. + */ +public class KinesisRecord { + + public static final String COLUMN_NAME_AB_ID = "_airbyte_ab_id"; + public static final String COLUMN_NAME_DATA = "_airbyte_data"; + public static final String COLUMN_NAME_EMITTED_AT = "_airbyte_emitted_at"; + + @JsonProperty(COLUMN_NAME_AB_ID) + private UUID id; + + @JsonProperty(COLUMN_NAME_DATA) + private String data; + + @JsonProperty(COLUMN_NAME_EMITTED_AT) + private Instant timestamp; + + public KinesisRecord() { + + } + + public KinesisRecord(UUID id, String data, Instant timestamp) { + this.id = id; + this.data = data; + this.timestamp = timestamp; + } + + public static KinesisRecord of(UUID id, String data, Instant timestamp) { + return new KinesisRecord(id, data, timestamp); + } + + public UUID getId() { + return id; + } + + public String getData() { + return data; + } + + public Instant getTimestamp() { + return timestamp; + } + + @Override + public String toString() { + return "KinesisRecord{" + + "id=" + id + + ", data='" + data + '\'' + + ", timestamp=" + timestamp + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java new file mode 100644 index 0000000..6faa753 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStream.java @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import io.airbyte.commons.json.Jsons; +import java.io.Closeable; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.BytesWrapper; +import software.amazon.awssdk.core.SdkBytes; +import software.amazon.awssdk.services.kinesis.KinesisClient; +import software.amazon.awssdk.services.kinesis.model.DescribeStreamResponse; +import software.amazon.awssdk.services.kinesis.model.PutRecordsRequestEntry; +import software.amazon.awssdk.services.kinesis.model.Record; +import software.amazon.awssdk.services.kinesis.model.ResourceInUseException; +import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException; +import software.amazon.awssdk.services.kinesis.model.Shard; +import software.amazon.awssdk.services.kinesis.model.ShardIteratorType; +import software.amazon.awssdk.services.kinesis.model.StreamStatus; + +/** + * KinesisStream class for performing various operations on a Kinesis stream. + */ +public class KinesisStream implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(KinesisStream.class); + + private final KinesisClient kinesisClient; + + private final KinesisConfig kinesisConfig; + + private final int bufferSize; + + // k:v tuples of > + private final List>> buffer; + + public KinesisStream(KinesisConfig kinesisConfig) { + this.kinesisConfig = kinesisConfig; + this.kinesisClient = KinesisClientPool.initClient(kinesisConfig); + this.bufferSize = kinesisConfig.getBufferSize(); + this.buffer = new ArrayList<>(bufferSize); + } + + /** + * Creates a stream specified via its name and with the provided shard count. The method will block + * and retry every 2s until it verifies that the stream is active and can be written to. If the + * stream is already created it will only wait until it is active. + * + * @param streamName name of the stream to be created. + */ + public void createStream(String streamName) { + try { + kinesisClient.createStream(b -> b.streamName(streamName).shardCount(kinesisConfig.getShardCount())); + } catch (ResourceInUseException e) { + LOGGER.info("Stream with name {} has already been created", streamName); + } + // block/wait until stream is active + for (;;) { + DescribeStreamResponse describeStream = kinesisClient.describeStream(b -> b.streamName(streamName)); + if (describeStream.streamDescription().streamStatus() == StreamStatus.ACTIVE) { + return; + } + try { + Thread.sleep(2000); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw KinesisUtils.buildKinesisException("Thread interrupted while waiting for stream to be active", e); + } + } + } + + /** + * Delete the stream specified via its name. The method will block and retry every 2s until it + * verifies that the stream is deleted by receiving the appropriate exception. + * + * @param streamName name of the stream to be deleted. + */ + public void deleteStream(String streamName) { + kinesisClient.deleteStream(b -> b.streamName(streamName)); + // block/wait until stream is deleted + for (;;) { + try { + kinesisClient.describeStream(b -> b.streamName(streamName)); + Thread.sleep(2000); + } catch (ResourceNotFoundException e) { + return; + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw KinesisUtils.buildKinesisException("Thread interrupted while waiting for stream to be deleted", e); + } + } + } + + /** + * Deletes all streams in the Kinesis service, waiting/blocking until all of them are deleted. + */ + public void deleteAllStreams() { + kinesisClient.listStreams().streamNames().forEach(this::deleteStream); + } + + /** + * Sends a record to the Kinesis stream specified via its name. To improve performance the records + * are buffered until the buffer limit is reached after which they are flushed to its destination + * stream. + * + * @param streamName name of the stream where the record should be sent + * @param partitionKey to determine the destination shard + * @param data actual data to be streamed + * @param exceptionConsumer for handling errors related to flushing data per stream + */ + public void putRecord(String streamName, String partitionKey, String data, Consumer exceptionConsumer) { + buffer.add(Tuple.of(streamName, Tuple.of(partitionKey, data))); + if (buffer.size() == bufferSize) { + flush(exceptionConsumer); + } + } + + /** + * Iterates over all the shards for a given streams and retrieves the records which are combined and + * deserialized to a {@link io.airbyte.integrations.destination.kinesis.KinesisRecord} objects. + * + * @param streamName from where to retrieve the records. + * @return List of KinesisRecord objects retrieved from the stream. + */ + public List getRecords(String streamName) { + DescribeStreamResponse describeStream; + List shards = new ArrayList<>(); + do { + + describeStream = kinesisClient.describeStream(b -> b.streamName(streamName)); + + shards.addAll(describeStream.streamDescription().shards()); + + } while (describeStream.streamDescription().hasMoreShards()); + + // iterate over stream shards and retrieve records + return shards.stream() + .map(Shard::shardId) + .map(sh -> kinesisClient.getShardIterator(b -> b.streamName(streamName) + .shardIteratorType(ShardIteratorType.TRIM_HORIZON) + .shardId(sh)) + .shardIterator()) + .flatMap(it -> kinesisClient.getRecords(b -> b.shardIterator(it)).records().stream()) + .map(Record::data) + .map(BytesWrapper::asUtf8String) + .map(str -> Jsons.deserialize(str, KinesisRecord.class)) + .collect(Collectors.toList()); + } + + /** + * Flush all records previously buffered to increase throughput and performance. Records are grouped + * by stream name and are sent for each stream separately. + * + * @param exceptionConsumer for handling errors related to flushing data per stream, rethrowing an + * exception in the consumer will stop the sync and clear the cache + */ + public void flush(Consumer exceptionConsumer) { + try { + buffer.stream() + .collect(Collectors.groupingBy(Tuple::value1, Collectors.mapping(Tuple::value2, Collectors.toList()))) + .forEach((k, v) -> { + var records = v.stream().map(entry -> PutRecordsRequestEntry.builder() + // partition key used to determine stream shard. + .partitionKey(entry.value1()) + .data(SdkBytes.fromUtf8String(entry.value2())) + .build()) + .collect(Collectors.toList()); + try { + kinesisClient.putRecords(b -> b.streamName(k).records(records)); + } catch (Exception e) { + exceptionConsumer.accept(e); + } + }); + } finally { + buffer.clear(); + } + } + + /** + * Return the kinesis client to the pool to be closed if no longer used. + */ + @Override + public void close() { + KinesisClientPool.closeClient(kinesisConfig); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStreamConfig.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStreamConfig.java new file mode 100644 index 0000000..868ef0c --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisStreamConfig.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import io.airbyte.protocol.models.v0.DestinationSyncMode; + +/** + * KinesisStreamConfig class for storing configuration data for every stream. + */ +public class KinesisStreamConfig { + + private final String streamName; + + private final DestinationSyncMode destinationSyncMode; + + public KinesisStreamConfig(String streamName, DestinationSyncMode destinationSyncMode) { + this.streamName = streamName; + this.destinationSyncMode = destinationSyncMode; + } + + public String getStreamName() { + return streamName; + } + + public DestinationSyncMode getDestinationSyncMode() { + return destinationSyncMode; + } + + @Override + public String toString() { + return "KinesisStreamConfig{" + + "streamName='" + streamName + '\'' + + ", destinationSyncMode=" + destinationSyncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisUtils.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisUtils.java new file mode 100644 index 0000000..c002f74 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/KinesisUtils.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import java.util.UUID; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.kinesis.KinesisClient; +import software.amazon.awssdk.services.kinesis.model.KinesisException; + +/** + * KinesisUtils class providing utility methods for various Kinesis functionalities. + */ +public class KinesisUtils { + + private KinesisUtils() { + + } + + /** + * Configures and returns a Kinesis client with the provided configuration. + * + * @param kinesisConfig used to configure the Kinesis client. + * @return KinesisClient which can be used to access Kinesis. + */ + static KinesisClient buildKinesisClient(KinesisConfig kinesisConfig) { + var kinesisClientBuilder = KinesisClient.builder(); + + // configure access credentials + kinesisClientBuilder.credentialsProvider(StaticCredentialsProvider.create( + AwsBasicCredentials.create(kinesisConfig.getAccessKey(), kinesisConfig.getPrivateKey()))); + + if (kinesisConfig.getRegion() != null && !kinesisConfig.getRegion().isBlank()) { + // configure access region + kinesisClientBuilder.region(Region.of(kinesisConfig.getRegion())); + } + + if (kinesisConfig.getEndpoint() != null) { + // configure access endpoint + kinesisClientBuilder.endpointOverride(kinesisConfig.getEndpoint()); + } + + return kinesisClientBuilder.build(); + } + + /** + * Build a Kinesis exception with the provided message and cause. + * + * @param message of the exception + * @param cause of the exception + * @return KinesisException to be thrown + */ + static KinesisException buildKinesisException(String message, Throwable cause) { + return (KinesisException) KinesisException.builder() + .message(message) + .cause(cause) + .build(); + } + + /** + * Create random UUID which can be used as a partition key for streaming data. + * + * @return String partition key for distributing data across shards. + */ + static String buildPartitionKey() { + return UUID.randomUUID().toString(); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/Tuple.java b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/Tuple.java new file mode 100644 index 0000000..3a3a853 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/java/io/airbyte/integrations/destination/kinesis/Tuple.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +/** + * Tuple class for wrapping a pair od objects. + */ +public class Tuple { + + private final V1 value1; + + private final V2 value2; + + public Tuple(V1 value1, V2 value2) { + this.value1 = value1; + this.value2 = value2; + } + + public static Tuple of(V1 value1, V2 value2) { + return new Tuple<>(value1, value2); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + @Override + public String toString() { + return "Tuple{" + + "value1=" + value1 + + ", value2=" + value2 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json new file mode 100644 index 0000000..3667ed0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/main/resources/spec.json @@ -0,0 +1,67 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/kinesis", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Kinesis Destination Spec", + "type": "object", + "required": [ + "endpoint", + "region", + "shardCount", + "accessKey", + "privateKey", + "bufferSize" + ], + "additionalProperties": true, + "properties": { + "endpoint": { + "title": "Endpoint", + "description": "AWS Kinesis endpoint.", + "type": "string", + "examples": ["kinesis.us‑west‑1.amazonaws.com"], + "order": 0 + }, + "region": { + "title": "Region", + "description": "AWS region. Your account determines the Regions that are available to you.", + "type": "string", + "examples": ["us‑west‑1"], + "order": 1 + }, + "shardCount": { + "title": "Shard Count", + "description": "Number of shards to which the data should be streamed.", + "type": "integer", + "default": 5, + "order": 2 + }, + "accessKey": { + "title": "Access Key", + "description": "Generate the AWS Access Key for current user.", + "airbyte_secret": true, + "type": "string", + "order": 3 + }, + "privateKey": { + "title": "Private Key", + "description": "The AWS Private Key - a string of numbers and letters that are unique for each account, also known as a \"recovery phrase\".", + "airbyte_secret": true, + "type": "string", + "order": 4 + }, + "bufferSize": { + "title": "Buffer Size", + "description": "Buffer size for storing kinesis records before being batch streamed.", + "type": "integer", + "minimum": 1, + "maximum": 500, + "default": 100, + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisContainerInitializr.java b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisContainerInitializr.java new file mode 100644 index 0000000..1c98d55 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisContainerInitializr.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import java.net.URI; +import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.utility.DockerImageName; + +public class KinesisContainerInitializr { + + private static KinesisContainer kinesisContainer; + + private KinesisContainerInitializr() { + + } + + static KinesisContainer initContainer() { + if (kinesisContainer == null) { + kinesisContainer = KinesisContainer.createContainer(); + } + kinesisContainer.start(); + return kinesisContainer; + } + + static class KinesisContainer extends LocalStackContainer { + + private KinesisContainer() { + super(DockerImageName.parse("localstack/localstack:0.12.20")); + } + + static KinesisContainer createContainer() { + return (KinesisContainer) new KinesisContainer() + .withServices(Service.KINESIS) + // lower kinesis response latency to 200 ms to speed up tests + .withEnv("KINESIS_LATENCY", "200") + // increase default shard limit + .withEnv("KINESIS_SHARD_LIMIT", "500"); + } + + URI getEndpointOverride() { + return super.getEndpointOverride(LocalStackContainer.Service.KINESIS); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java new file mode 100644 index 0000000..c481cc4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; + +public class KinesisDataFactory { + + private KinesisDataFactory() { + + } + + static JsonNode jsonConfig(String endpoint, String region, String accessKey, String privateKey) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("endpoint", endpoint) + .put("region", region) + .put("shardCount", 5) + .put("accessKey", accessKey) + .put("privateKey", privateKey) + .put("bufferSize", 100) + .build()); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationAcceptanceTest.java new file mode 100644 index 0000000..0a30d35 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationAcceptanceTest.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class KinesisDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(KinesisDestinationAcceptanceTest.class); + + private JsonNode configJson; + + private KinesisStream kinesisStream; + + private KinesisNameTransformer kinesisNameTransformer; + + private static KinesisContainerInitializr.KinesisContainer kinesisContainer; + + @BeforeAll + static void initContainer() { + kinesisContainer = KinesisContainerInitializr.initContainer(); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + configJson = KinesisDataFactory.jsonConfig( + kinesisContainer.getEndpointOverride().toString(), + kinesisContainer.getRegion(), + kinesisContainer.getAccessKey(), + kinesisContainer.getSecretKey()); + kinesisStream = new KinesisStream(new KinesisConfig(configJson)); + kinesisNameTransformer = new KinesisNameTransformer(); + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + kinesisStream.deleteAllStreams(); + } + + @Override + protected String getImageName() { + return "airbyte/destination-kinesis:dev"; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected JsonNode getConfig() { + return configJson; + } + + @Override + protected JsonNode getFailCheckConfig() { + return KinesisDataFactory.jsonConfig( + "127.0.0.9", + "eu-west-1", + "random_access_key", + "random_secret_key"); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) { + final var stream = kinesisNameTransformer.streamName(namespace, streamName); + return kinesisStream.getRecords(stream).stream() + .sorted(Comparator.comparing(KinesisRecord::getTimestamp)) + .map(KinesisRecord::getData) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationTest.java new file mode 100644 index 0000000..18cdba0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisDestinationTest.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class KinesisDestinationTest { + + private static KinesisContainerInitializr.KinesisContainer kinesisContainer; + + private KinesisDestination kinesisDestination; + + @BeforeAll + static void setup() { + kinesisContainer = KinesisContainerInitializr.initContainer(); + } + + @BeforeEach + void init() { + this.kinesisDestination = new KinesisDestination(); + } + + @Test + void testCheckConnectionWithSuccess() { + + var jsonConfig = KinesisDataFactory.jsonConfig( + kinesisContainer.getEndpointOverride().toString(), + kinesisContainer.getRegion(), + kinesisContainer.getAccessKey(), + kinesisContainer.getSecretKey()); + + var connectionStatus = kinesisDestination.check(jsonConfig); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + } + + @Test + void testCheckTestConnectionWithFailure() { + + var jsonConfig = KinesisDataFactory.jsonConfig( + "127.0.0.9", + "eu-west-1", + "random_access_key", + "random_secret_key"); + + var connectionStatus = kinesisDestination.check(jsonConfig); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisStreamTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisStreamTest.java new file mode 100644 index 0000000..b2a07d9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test-integration/java/io/airbyte/integrations/destination/kinesis/KinesisStreamTest.java @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.airbyte.commons.json.Jsons; +import java.time.Instant; +import java.util.Map; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import software.amazon.awssdk.services.kinesis.model.ResourceNotFoundException; + +class KinesisStreamTest { + + private static KinesisContainerInitializr.KinesisContainer kinesisContainer; + + private KinesisStream kinesisStream; + + @BeforeAll + static void setup() { + kinesisContainer = KinesisContainerInitializr.initContainer(); + } + + @BeforeEach + void init() { + var jsonConfig = KinesisDataFactory.jsonConfig( + kinesisContainer.getEndpointOverride().toString(), + kinesisContainer.getRegion(), + kinesisContainer.getAccessKey(), + kinesisContainer.getSecretKey()); + this.kinesisStream = new KinesisStream(new KinesisConfig(jsonConfig)); + } + + @AfterEach + void cleanup() { + kinesisStream.deleteAllStreams(); + } + + @Test + void testCreateStream() { + String streamName = "test_create_stream"; + // given + kinesisStream.createStream(streamName); + kinesisStream.flush(e -> {}); + // when + var records = kinesisStream.getRecords(streamName); + + // then + assertThat(records) + .isNotNull() + .hasSize(0); + + } + + @Test + void testDeleteStream() { + String streamName = "test_delete_stream"; + // given + kinesisStream.createStream(streamName); + + // when + kinesisStream.deleteStream(streamName); + + // then + assertThrows(ResourceNotFoundException.class, () -> kinesisStream.getRecords(streamName)); + } + + @Test + void testDeleteAllStreams() { + var streamName1 = "test_delete_all_stream1"; + var streamName2 = "test_delete_all_stream2"; + // given + kinesisStream.createStream(streamName1); + kinesisStream.createStream(streamName2); + + // when + kinesisStream.deleteAllStreams(); + + // then + assertThrows(ResourceNotFoundException.class, () -> kinesisStream.getRecords(streamName1)); + assertThrows(ResourceNotFoundException.class, () -> kinesisStream.getRecords(streamName2)); + + } + + @Test + void testPutRecordAndFlush() { + // given + String streamName = "test_put_record_stream"; + kinesisStream.createStream(streamName); + + var partitionKey1 = KinesisUtils.buildPartitionKey(); + kinesisStream.putRecord(streamName, partitionKey1, createData(partitionKey1, "{\"property\":\"data1\"}"), + e -> {}); + + var partitionKey2 = KinesisUtils.buildPartitionKey(); + kinesisStream.putRecord(streamName, partitionKey2, createData(partitionKey2, "{\"property\":\"data2\"}"), + e -> {}); + + kinesisStream.flush(e -> {}); + + // when + var records = kinesisStream.getRecords(streamName); + + // then + assertThat(records) + .isNotNull() + .hasSize(2) + .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}")); + } + + private String createData(String partitionKey, String data) { + var kinesisRecord = Jsons.jsonNode(Map.of( + KinesisRecord.COLUMN_NAME_AB_ID, partitionKey, + KinesisRecord.COLUMN_NAME_DATA, data, + KinesisRecord.COLUMN_NAME_EMITTED_AT, Instant.now())); + return Jsons.serialize(kinesisRecord); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisConfigTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisConfigTest.java new file mode 100644 index 0000000..4286645 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisConfigTest.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.net.URI; +import java.net.URISyntaxException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class KinesisConfigTest { + + private KinesisConfig kinesisConfig; + + @BeforeEach + void setup() { + var jsonConfig = KinesisDataFactory.jsonConfig( + "http://aws.kinesis.com", + "eu-west-1", + "random_access_key", + "random_secret_key"); + this.kinesisConfig = new KinesisConfig(jsonConfig); + } + + @Test + void testConfig() throws URISyntaxException { + + assertThat(kinesisConfig) + .hasFieldOrPropertyWithValue("endpoint", new URI("http://aws.kinesis.com")) + .hasFieldOrPropertyWithValue("region", "eu-west-1") + .hasFieldOrPropertyWithValue("shardCount", 5) + .hasFieldOrPropertyWithValue("accessKey", "random_access_key") + .hasFieldOrPropertyWithValue("privateKey", "random_secret_key") + .hasFieldOrPropertyWithValue("bufferSize", 100); + + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java new file mode 100644 index 0000000..c481cc4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisDataFactory.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; + +public class KinesisDataFactory { + + private KinesisDataFactory() { + + } + + static JsonNode jsonConfig(String endpoint, String region, String accessKey, String privateKey) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("endpoint", endpoint) + .put("region", region) + .put("shardCount", 5) + .put("accessKey", accessKey) + .put("privateKey", privateKey) + .put("bufferSize", 100) + .build()); + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformerTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformerTest.java new file mode 100644 index 0000000..451caad --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisNameTransformerTest.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class KinesisNameTransformerTest { + + private KinesisNameTransformer kinesisNameTransformer; + + @BeforeEach + void setup() { + this.kinesisNameTransformer = new KinesisNameTransformer(); + } + + @Test + void outputStream() { + + var column = kinesisNameTransformer.streamName("namespace", "stream"); + + assertThat(column).matches("namespace_stream"); + + } + + @Test + void outputStreamConvert() { + + var keyspace = kinesisNameTransformer.streamName("**namespace^h", "##stream"); + + assertThat(keyspace).matches("__namespace_h___stream"); + + } + +} diff --git a/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisRecordConsumerTest.java b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisRecordConsumerTest.java new file mode 100644 index 0000000..f2ac46a --- /dev/null +++ b/airbyte-integrations/connectors/destination-kinesis/src/test/java/io/airbyte/integrations/destination/kinesis/KinesisRecordConsumerTest.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.kinesis; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.function.Consumer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@DisplayName("KinesisRecordConsumer") +@ExtendWith(MockitoExtension.class) +public class KinesisRecordConsumerTest extends PerStreamStateMessageTest { + + @Mock + private Consumer outputRecordCollector; + + @Mock + private ConfiguredAirbyteCatalog catalog; + @Mock + private KinesisStream kinesisStream; + + private KinesisMessageConsumer consumer; + + @BeforeEach + public void init() { + consumer = new KinesisMessageConsumer(catalog, kinesisStream, outputRecordCollector); + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + +} diff --git a/airbyte-integrations/connectors/destination-kvdb/metadata.yaml b/airbyte-integrations/connectors/destination-kvdb/metadata.yaml index 9c553db..64a38cf 100644 --- a/airbyte-integrations/connectors/destination-kvdb/metadata.yaml +++ b/airbyte-integrations/connectors/destination-kvdb/metadata.yaml @@ -4,10 +4,12 @@ data: enabled: false oss: enabled: false + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.2.0@sha256:c22a9d97464b69d6ef01898edf3f8612dc11614f05a84984451dde195f337db9 connectorSubtype: api connectorType: destination definitionId: f2e549cd-8e2a-48f8-822d-cc13630eb42d - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 dockerRepository: airbyte/destination-kvdb githubIssueLabel: destination-kvdb icon: kvdb.svg diff --git a/airbyte-integrations/connectors/destination-kvdb/poetry.lock b/airbyte-integrations/connectors/destination-kvdb/poetry.lock new file mode 100644 index 0000000..7835868 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kvdb/poetry.lock @@ -0,0 +1,1108 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "airbyte-cdk" +version = "0.62.2" +description = "A framework for writing Airbyte Connectors." +optional = false +python-versions = ">=3.8" +files = [ + {file = "airbyte-cdk-0.62.2.tar.gz", hash = "sha256:bf45cb847e2d2ab7063d0e1989f6c9cf022771c6ae4fb1e854438c3b8377da85"}, + {file = "airbyte_cdk-0.62.2-py3-none-any.whl", hash = "sha256:6d04d2e8a9a32aa707ddf27a1916ac76969fb50ac39d60582ad2daa08ef832ef"}, +] + +[package.dependencies] +airbyte-protocol-models = "0.5.1" +backoff = "*" +cachetools = "*" +Deprecated = ">=1.2,<2.0" +dpath = ">=2.0.1,<2.1.0" +genson = "1.2.2" +isodate = ">=0.6.1,<0.7.0" +Jinja2 = ">=3.1.2,<3.2.0" +jsonref = ">=0.2,<1.0" +jsonschema = ">=3.2.0,<3.3.0" +pendulum = "<3.0.0" +pydantic = ">=1.10.8,<2.0.0" +pyrate-limiter = ">=3.1.0,<3.2.0" +python-dateutil = "*" +PyYAML = ">=6.0.1" +requests = "*" +requests-cache = "*" +wcmatch = "8.4" + +[package.extras] +dev = ["avro (>=1.11.2,<1.12.0)", "cohere (==4.21)", "fastavro (>=1.8.0,<1.9.0)", "freezegun", "langchain (==0.0.271)", "markdown", "mypy", "openai[embeddings] (==0.27.9)", "pandas (==2.0.3)", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (==12.0.1)", "pytesseract (==0.3.10)", "pytest", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests-mock", "tiktoken (==0.4.0)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (==12.0.1)", "pytesseract (==0.3.10)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +sphinx-docs = ["Sphinx (>=4.2,<5.0)", "sphinx-rtd-theme (>=1.0,<2.0)"] +vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] + +[[package]] +name = "airbyte-protocol-models" +version = "0.5.1" +description = "Declares the Airbyte Protocol." +optional = false +python-versions = ">=3.8" +files = [ + {file = "airbyte_protocol_models-0.5.1-py3-none-any.whl", hash = "sha256:dfe84e130e51ce2ae81a06d5aa36f6c5ce3152b9e36e6f0195fad6c3dab0927e"}, + {file = "airbyte_protocol_models-0.5.1.tar.gz", hash = "sha256:7c8b16c7c1c7956b1996052e40585a3a93b1e44cb509c4e97c1ee4fe507ea086"}, +] + +[package.dependencies] +pydantic = ">=1.9.2,<2.0.0" + +[[package]] +name = "atomicwrites" +version = "1.4.1" +description = "Atomic file writes." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] + +[[package]] +name = "attrs" +version = "23.2.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"}, + {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] +tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] + +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "bracex" +version = "2.4" +description = "Bash style brace expander." +optional = false +python-versions = ">=3.8" +files = [ + {file = "bracex-2.4-py3-none-any.whl", hash = "sha256:efdc71eff95eaff5e0f8cfebe7d01adf2c8637c8c92edaf63ef348c241a82418"}, + {file = "bracex-2.4.tar.gz", hash = "sha256:a27eaf1df42cf561fed58b7a8f3fdf129d1ea16a81e1fadd1d17989bc6384beb"}, +] + +[[package]] +name = "cachetools" +version = "5.3.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"}, + {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"}, +] + +[[package]] +name = "cattrs" +version = "23.2.3" +description = "Composable complex class support for attrs and dataclasses." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cattrs-23.2.3-py3-none-any.whl", hash = "sha256:0341994d94971052e9ee70662542699a3162ea1e0c62f7ce1b4a57f563685108"}, + {file = "cattrs-23.2.3.tar.gz", hash = "sha256:a934090d95abaa9e911dac357e3a8699e0b4b14f8529bcc7d2b1ad9d51672b9f"}, +] + +[package.dependencies] +attrs = ">=23.1.0" +exceptiongroup = {version = ">=1.1.1", markers = "python_version < \"3.11\""} +typing-extensions = {version = ">=4.1.0,<4.6.3 || >4.6.3", markers = "python_version < \"3.11\""} + +[package.extras] +bson = ["pymongo (>=4.4.0)"] +cbor2 = ["cbor2 (>=5.4.6)"] +msgpack = ["msgpack (>=1.0.5)"] +orjson = ["orjson (>=3.9.2)"] +pyyaml = ["pyyaml (>=6.0)"] +tomlkit = ["tomlkit (>=0.11.8)"] +ujson = ["ujson (>=5.7.0)"] + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + +[[package]] +name = "dpath" +version = "2.0.8" +description = "Filesystem-like pathing and searching for dictionaries" +optional = false +python-versions = ">=3.7" +files = [ + {file = "dpath-2.0.8-py3-none-any.whl", hash = "sha256:f92f595214dd93a00558d75d4b858beee519f4cffca87f02616ad6cd013f3436"}, + {file = "dpath-2.0.8.tar.gz", hash = "sha256:a3440157ebe80d0a3ad794f1b61c571bef125214800ffdb9afc9424e8250fe9b"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.0" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "freezegun" +version = "1.4.0" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.4.0-py3-none-any.whl", hash = "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6"}, + {file = "freezegun-1.4.0.tar.gz", hash = "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "genson" +version = "1.2.2" +description = "GenSON is a powerful, user-friendly JSON Schema generator." +optional = false +python-versions = "*" +files = [ + {file = "genson-1.2.2.tar.gz", hash = "sha256:8caf69aa10af7aee0e1a1351d1d06801f4696e005f06cedef438635384346a16"}, +] + +[[package]] +name = "graphql-core" +version = "3.2.3" +description = "GraphQL implementation for Python, a port of GraphQL.js, the JavaScript reference implementation for GraphQL." +optional = false +python-versions = ">=3.6,<4" +files = [ + {file = "graphql-core-3.2.3.tar.gz", hash = "sha256:06d2aad0ac723e35b1cb47885d3e5c45e956a53bc1b209a9fc5369007fe46676"}, + {file = "graphql_core-3.2.3-py3-none-any.whl", hash = "sha256:5766780452bd5ec8ba133f8bf287dc92713e3868ddd83aee4faab9fc3e303dc3"}, +] + +[[package]] +name = "idna" +version = "3.6" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, + {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "isodate" +version = "0.6.1" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = "*" +files = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "jinja2" +version = "3.1.3" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "jsonref" +version = "0.3.0" +description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python." +optional = false +python-versions = ">=3.3,<4.0" +files = [ + {file = "jsonref-0.3.0-py3-none-any.whl", hash = "sha256:9480ad1b500f7e795daeb0ef29f9c55ae3a9ab38fb8d6659b6f4868acb5a5bc8"}, + {file = "jsonref-0.3.0.tar.gz", hash = "sha256:68b330c6815dc0d490dbb3d65ccda265ddde9f7856fd2f3322f971d456ea7549"}, +] + +[[package]] +name = "jsonschema" +version = "3.2.0" +description = "An implementation of JSON Schema validation for Python" +optional = false +python-versions = "*" +files = [ + {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"}, + {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"}, +] + +[package.dependencies] +attrs = ">=17.4.0" +pyrsistent = ">=0.14.0" +setuptools = "*" +six = ">=1.11.0" + +[package.extras] +format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] +format-nongpl = ["idna", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "webcolors"] + +[[package]] +name = "markupsafe" +version = "2.1.5" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pendulum" +version = "2.1.2" +description = "Python datetimes made easy" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pendulum-2.1.2-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:b6c352f4bd32dff1ea7066bd31ad0f71f8d8100b9ff709fb343f3b86cee43efe"}, + {file = "pendulum-2.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:318f72f62e8e23cd6660dbafe1e346950281a9aed144b5c596b2ddabc1d19739"}, + {file = "pendulum-2.1.2-cp35-cp35m-macosx_10_15_x86_64.whl", hash = "sha256:0731f0c661a3cb779d398803655494893c9f581f6488048b3fb629c2342b5394"}, + {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:3481fad1dc3f6f6738bd575a951d3c15d4b4ce7c82dce37cf8ac1483fde6e8b0"}, + {file = "pendulum-2.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9702069c694306297ed362ce7e3c1ef8404ac8ede39f9b28b7c1a7ad8c3959e3"}, + {file = "pendulum-2.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:fb53ffa0085002ddd43b6ca61a7b34f2d4d7c3ed66f931fe599e1a531b42af9b"}, + {file = "pendulum-2.1.2-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:c501749fdd3d6f9e726086bf0cd4437281ed47e7bca132ddb522f86a1645d360"}, + {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c807a578a532eeb226150d5006f156632df2cc8c5693d778324b43ff8c515dd0"}, + {file = "pendulum-2.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:2d1619a721df661e506eff8db8614016f0720ac171fe80dda1333ee44e684087"}, + {file = "pendulum-2.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f888f2d2909a414680a29ae74d0592758f2b9fcdee3549887779cd4055e975db"}, + {file = "pendulum-2.1.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:e95d329384717c7bf627bf27e204bc3b15c8238fa8d9d9781d93712776c14002"}, + {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:4c9c689747f39d0d02a9f94fcee737b34a5773803a64a5fdb046ee9cac7442c5"}, + {file = "pendulum-2.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:1245cd0075a3c6d889f581f6325dd8404aca5884dea7223a5566c38aab94642b"}, + {file = "pendulum-2.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:db0a40d8bcd27b4fb46676e8eb3c732c67a5a5e6bfab8927028224fbced0b40b"}, + {file = "pendulum-2.1.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f5e236e7730cab1644e1b87aca3d2ff3e375a608542e90fe25685dae46310116"}, + {file = "pendulum-2.1.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de42ea3e2943171a9e95141f2eecf972480636e8e484ccffaf1e833929e9e052"}, + {file = "pendulum-2.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:7c5ec650cb4bec4c63a89a0242cc8c3cebcec92fcfe937c417ba18277d8560be"}, + {file = "pendulum-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:33fb61601083f3eb1d15edeb45274f73c63b3c44a8524703dc143f4212bf3269"}, + {file = "pendulum-2.1.2-cp39-cp39-manylinux1_i686.whl", hash = "sha256:29c40a6f2942376185728c9a0347d7c0f07905638c83007e1d262781f1e6953a"}, + {file = "pendulum-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:94b1fc947bfe38579b28e1cccb36f7e28a15e841f30384b5ad6c5e31055c85d7"}, + {file = "pendulum-2.1.2.tar.gz", hash = "sha256:b06a0ca1bfe41c990bbf0c029f0b6501a7f2ec4e38bfec730712015e8860f207"}, +] + +[package.dependencies] +python-dateutil = ">=2.6,<3.0" +pytzdata = ">=2020.1" + +[[package]] +name = "platformdirs" +version = "4.2.0" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, + {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] + +[[package]] +name = "pluggy" +version = "1.4.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] + +[[package]] +name = "pydantic" +version = "1.10.14" +description = "Data validation and settings management using python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-1.10.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7f4fcec873f90537c382840f330b90f4715eebc2bc9925f04cb92de593eae054"}, + {file = "pydantic-1.10.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e3a76f571970fcd3c43ad982daf936ae39b3e90b8a2e96c04113a369869dc87"}, + {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d886bd3c3fbeaa963692ef6b643159ccb4b4cefaf7ff1617720cbead04fd1d"}, + {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:798a3d05ee3b71967844a1164fd5bdb8c22c6d674f26274e78b9f29d81770c4e"}, + {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:23d47a4b57a38e8652bcab15a658fdb13c785b9ce217cc3a729504ab4e1d6bc9"}, + {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9f674b5c3bebc2eba401de64f29948ae1e646ba2735f884d1594c5f675d6f2a"}, + {file = "pydantic-1.10.14-cp310-cp310-win_amd64.whl", hash = "sha256:24a7679fab2e0eeedb5a8924fc4a694b3bcaac7d305aeeac72dd7d4e05ecbebf"}, + {file = "pydantic-1.10.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9d578ac4bf7fdf10ce14caba6f734c178379bd35c486c6deb6f49006e1ba78a7"}, + {file = "pydantic-1.10.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa7790e94c60f809c95602a26d906eba01a0abee9cc24150e4ce2189352deb1b"}, + {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad4e10efa5474ed1a611b6d7f0d130f4aafadceb73c11d9e72823e8f508e663"}, + {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1245f4f61f467cb3dfeced2b119afef3db386aec3d24a22a1de08c65038b255f"}, + {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:21efacc678a11114c765eb52ec0db62edffa89e9a562a94cbf8fa10b5db5c046"}, + {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:412ab4a3f6dbd2bf18aefa9f79c7cca23744846b31f1d6555c2ee2b05a2e14ca"}, + {file = "pydantic-1.10.14-cp311-cp311-win_amd64.whl", hash = "sha256:e897c9f35281f7889873a3e6d6b69aa1447ceb024e8495a5f0d02ecd17742a7f"}, + {file = "pydantic-1.10.14-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d604be0f0b44d473e54fdcb12302495fe0467c56509a2f80483476f3ba92b33c"}, + {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42c7d17706911199798d4c464b352e640cab4351efe69c2267823d619a937e5"}, + {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:596f12a1085e38dbda5cbb874d0973303e34227b400b6414782bf205cc14940c"}, + {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bfb113860e9288d0886e3b9e49d9cf4a9d48b441f52ded7d96db7819028514cc"}, + {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bc3ed06ab13660b565eed80887fcfbc0070f0aa0691fbb351657041d3e874efe"}, + {file = "pydantic-1.10.14-cp37-cp37m-win_amd64.whl", hash = "sha256:ad8c2bc677ae5f6dbd3cf92f2c7dc613507eafe8f71719727cbc0a7dec9a8c01"}, + {file = "pydantic-1.10.14-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c37c28449752bb1f47975d22ef2882d70513c546f8f37201e0fec3a97b816eee"}, + {file = "pydantic-1.10.14-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49a46a0994dd551ec051986806122767cf144b9702e31d47f6d493c336462597"}, + {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53e3819bd20a42470d6dd0fe7fc1c121c92247bca104ce608e609b59bc7a77ee"}, + {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbb503bbbbab0c588ed3cd21975a1d0d4163b87e360fec17a792f7d8c4ff29f"}, + {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:336709883c15c050b9c55a63d6c7ff09be883dbc17805d2b063395dd9d9d0022"}, + {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4ae57b4d8e3312d486e2498d42aed3ece7b51848336964e43abbf9671584e67f"}, + {file = "pydantic-1.10.14-cp38-cp38-win_amd64.whl", hash = "sha256:dba49d52500c35cfec0b28aa8b3ea5c37c9df183ffc7210b10ff2a415c125c4a"}, + {file = "pydantic-1.10.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c66609e138c31cba607d8e2a7b6a5dc38979a06c900815495b2d90ce6ded35b4"}, + {file = "pydantic-1.10.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d986e115e0b39604b9eee3507987368ff8148222da213cd38c359f6f57b3b347"}, + {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:646b2b12df4295b4c3148850c85bff29ef6d0d9621a8d091e98094871a62e5c7"}, + {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282613a5969c47c83a8710cc8bfd1e70c9223feb76566f74683af889faadc0ea"}, + {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:466669501d08ad8eb3c4fecd991c5e793c4e0bbd62299d05111d4f827cded64f"}, + {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:13e86a19dca96373dcf3190fcb8797d40a6f12f154a244a8d1e8e03b8f280593"}, + {file = "pydantic-1.10.14-cp39-cp39-win_amd64.whl", hash = "sha256:08b6ec0917c30861e3fe71a93be1648a2aa4f62f866142ba21670b24444d7fd8"}, + {file = "pydantic-1.10.14-py3-none-any.whl", hash = "sha256:8ee853cd12ac2ddbf0ecbac1c289f95882b2d4482258048079d13be700aa114c"}, + {file = "pydantic-1.10.14.tar.gz", hash = "sha256:46f17b832fe27de7850896f3afee50ea682220dd218f7e9c88d436788419dca6"}, +] + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pyrate-limiter" +version = "3.1.1" +description = "Python Rate-Limiter using Leaky-Bucket Algorithm" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "pyrate_limiter-3.1.1-py3-none-any.whl", hash = "sha256:c51906f1d51d56dc992ff6c26e8300e32151bc6cfa3e6559792e31971dfd4e2b"}, + {file = "pyrate_limiter-3.1.1.tar.gz", hash = "sha256:2f57eda712687e6eccddf6afe8f8a15b409b97ed675fe64a626058f12863b7b7"}, +] + +[package.extras] +all = ["filelock (>=3.0)", "redis (>=5.0.0,<6.0.0)"] +docs = ["furo (>=2022.3.4,<2023.0.0)", "myst-parser (>=0.17)", "sphinx (>=4.3.0,<5.0.0)", "sphinx-autodoc-typehints (>=1.17,<2.0)", "sphinx-copybutton (>=0.5)", "sphinxcontrib-apidoc (>=0.3,<0.4)"] + +[[package]] +name = "pyrsistent" +version = "0.20.0" +description = "Persistent/Functional/Immutable data structures" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pyrsistent-0.20.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c3aba3e01235221e5b229a6c05f585f344734bd1ad42a8ac51493d74722bbce"}, + {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1beb78af5423b879edaf23c5591ff292cf7c33979734c99aa66d5914ead880f"}, + {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cc459636983764e692b9eba7144cdd54fdec23ccdb1e8ba392a63666c60c34"}, + {file = "pyrsistent-0.20.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f5ac696f02b3fc01a710427585c855f65cd9c640e14f52abe52020722bb4906b"}, + {file = "pyrsistent-0.20.0-cp310-cp310-win32.whl", hash = "sha256:0724c506cd8b63c69c7f883cc233aac948c1ea946ea95996ad8b1380c25e1d3f"}, + {file = "pyrsistent-0.20.0-cp310-cp310-win_amd64.whl", hash = "sha256:8441cf9616d642c475684d6cf2520dd24812e996ba9af15e606df5f6fd9d04a7"}, + {file = "pyrsistent-0.20.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0f3b1bcaa1f0629c978b355a7c37acd58907390149b7311b5db1b37648eb6958"}, + {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdd7ef1ea7a491ae70d826b6cc64868de09a1d5ff9ef8d574250d0940e275b8"}, + {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cae40a9e3ce178415040a0383f00e8d68b569e97f31928a3a8ad37e3fde6df6a"}, + {file = "pyrsistent-0.20.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6288b3fa6622ad8a91e6eb759cfc48ff3089e7c17fb1d4c59a919769314af224"}, + {file = "pyrsistent-0.20.0-cp311-cp311-win32.whl", hash = "sha256:7d29c23bdf6e5438c755b941cef867ec2a4a172ceb9f50553b6ed70d50dfd656"}, + {file = "pyrsistent-0.20.0-cp311-cp311-win_amd64.whl", hash = "sha256:59a89bccd615551391f3237e00006a26bcf98a4d18623a19909a2c48b8e986ee"}, + {file = "pyrsistent-0.20.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:09848306523a3aba463c4b49493a760e7a6ca52e4826aa100ee99d8d39b7ad1e"}, + {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a14798c3005ec892bbada26485c2eea3b54109cb2533713e355c806891f63c5e"}, + {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b14decb628fac50db5e02ee5a35a9c0772d20277824cfe845c8a8b717c15daa3"}, + {file = "pyrsistent-0.20.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e2c116cc804d9b09ce9814d17df5edf1df0c624aba3b43bc1ad90411487036d"}, + {file = "pyrsistent-0.20.0-cp312-cp312-win32.whl", hash = "sha256:e78d0c7c1e99a4a45c99143900ea0546025e41bb59ebc10182e947cf1ece9174"}, + {file = "pyrsistent-0.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:4021a7f963d88ccd15b523787d18ed5e5269ce57aa4037146a2377ff607ae87d"}, + {file = "pyrsistent-0.20.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:79ed12ba79935adaac1664fd7e0e585a22caa539dfc9b7c7c6d5ebf91fb89054"}, + {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f920385a11207dc372a028b3f1e1038bb244b3ec38d448e6d8e43c6b3ba20e98"}, + {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f5c2d012671b7391803263419e31b5c7c21e7c95c8760d7fc35602353dee714"}, + {file = "pyrsistent-0.20.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef3992833fbd686ee783590639f4b8343a57f1f75de8633749d984dc0eb16c86"}, + {file = "pyrsistent-0.20.0-cp38-cp38-win32.whl", hash = "sha256:881bbea27bbd32d37eb24dd320a5e745a2a5b092a17f6debc1349252fac85423"}, + {file = "pyrsistent-0.20.0-cp38-cp38-win_amd64.whl", hash = "sha256:6d270ec9dd33cdb13f4d62c95c1a5a50e6b7cdd86302b494217137f760495b9d"}, + {file = "pyrsistent-0.20.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ca52d1ceae015859d16aded12584c59eb3825f7b50c6cfd621d4231a6cc624ce"}, + {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b318ca24db0f0518630e8b6f3831e9cba78f099ed5c1d65ffe3e023003043ba0"}, + {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed2c3216a605dc9a6ea50c7e84c82906e3684c4e80d2908208f662a6cbf9022"}, + {file = "pyrsistent-0.20.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e14c95c16211d166f59c6611533d0dacce2e25de0f76e4c140fde250997b3ca"}, + {file = "pyrsistent-0.20.0-cp39-cp39-win32.whl", hash = "sha256:f058a615031eea4ef94ead6456f5ec2026c19fb5bd6bfe86e9665c4158cf802f"}, + {file = "pyrsistent-0.20.0-cp39-cp39-win_amd64.whl", hash = "sha256:58b8f6366e152092194ae68fefe18b9f0b4f89227dfd86a07770c3d86097aebf"}, + {file = "pyrsistent-0.20.0-py3-none-any.whl", hash = "sha256:c55acc4733aad6560a7f5f818466631f07efc001fd023f34a6c203f8b6df0f0b"}, + {file = "pyrsistent-0.20.0.tar.gz", hash = "sha256:4c48f78f62ab596c679086084d0dd13254ae4f3d6c72a83ffdf5ebdef8f265a4"}, +] + +[[package]] +name = "pytest" +version = "6.2.5" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, + {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, +] + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +toml = "*" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "pytest-mock" +version = "3.12.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, + {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, +] + +[package.dependencies] +pytest = ">=5.0" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytzdata" +version = "2020.1" +description = "The Olson timezone database for Python." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pytzdata-2020.1-py2.py3-none-any.whl", hash = "sha256:e1e14750bcf95016381e4d472bad004eef710f2d6417240904070b3d6654485f"}, + {file = "pytzdata-2020.1.tar.gz", hash = "sha256:3efa13b335a00a8de1d345ae41ec78dd11c9f8807f522d39850f2dd828681540"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-cache" +version = "1.2.0" +description = "A persistent cache for python requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests_cache-1.2.0-py3-none-any.whl", hash = "sha256:490324301bf0cb924ff4e6324bd2613453e7e1f847353928b08adb0fdfb7f722"}, + {file = "requests_cache-1.2.0.tar.gz", hash = "sha256:db1c709ca343cc1cd5b6c8b1a5387298eceed02306a6040760db538c885e3838"}, +] + +[package.dependencies] +attrs = ">=21.2" +cattrs = ">=22.2" +platformdirs = ">=2.5" +requests = ">=2.22" +url-normalize = ">=1.4" +urllib3 = ">=1.25.5" + +[package.extras] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] +bson = ["bson (>=0.5)"] +docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"] +dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] +json = ["ujson (>=5.4)"] +mongodb = ["pymongo (>=3)"] +redis = ["redis (>=3)"] +security = ["itsdangerous (>=2.0)"] +yaml = ["pyyaml (>=6.0.1)"] + +[[package]] +name = "requests-mock" +version = "1.11.0" +description = "Mock out responses from the requests package" +optional = false +python-versions = "*" +files = [ + {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, + {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, +] + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] + +[[package]] +name = "responses" +version = "0.23.3" +description = "A utility library for mocking out the `requests` Python library." +optional = false +python-versions = ">=3.7" +files = [ + {file = "responses-0.23.3-py3-none-any.whl", hash = "sha256:e6fbcf5d82172fecc0aa1860fd91e58cbfd96cee5e96da5b63fa6eb3caa10dd3"}, + {file = "responses-0.23.3.tar.gz", hash = "sha256:205029e1cb334c21cb4ec64fc7599be48b859a0fd381a42443cdd600bfe8b16a"}, +] + +[package.dependencies] +pyyaml = "*" +requests = ">=2.30.0,<3.0" +types-PyYAML = "*" +urllib3 = ">=1.25.10,<3.0" + +[package.extras] +tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-requests"] + +[[package]] +name = "setuptools" +version = "69.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-69.1.0-py3-none-any.whl", hash = "sha256:c054629b81b946d63a9c6e732bc8b2513a7c3ea645f11d0139a2191d735c60c6"}, + {file = "setuptools-69.1.0.tar.gz", hash = "sha256:850894c4195f09c4ed30dba56213bf7c3f21d86ed6bdaafb5df5972593bfc401"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "sgqlc" +version = "16.3" +description = "Simple GraphQL Client" +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "sgqlc-16.3-py3-none-any.whl", hash = "sha256:89d468386a4ba4b5ade991623228b6fb0a25bea1f25643ccac130fb3ef565b72"}, + {file = "sgqlc-16.3.tar.gz", hash = "sha256:be08857775aa3e65ef7b2c1f0cdcc65dd5794907b162b393c189187fee664558"}, +] + +[package.dependencies] +graphql-core = ">=3.1.7,<4.0.0" + +[package.extras] +requests = ["requests"] +websocket = ["websocket-client"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "types-pyyaml" +version = "6.0.12.12" +description = "Typing stubs for PyYAML" +optional = false +python-versions = "*" +files = [ + {file = "types-PyYAML-6.0.12.12.tar.gz", hash = "sha256:334373d392fde0fdf95af5c3f1661885fa10c52167b14593eb856289e1855062"}, + {file = "types_PyYAML-6.0.12.12-py3-none-any.whl", hash = "sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"}, +] + +[[package]] +name = "typing-extensions" +version = "4.9.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, + {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, +] + +[[package]] +name = "url-normalize" +version = "1.4.3" +description = "URL normalization for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "url-normalize-1.4.3.tar.gz", hash = "sha256:d23d3a070ac52a67b83a1c59a0e68f8608d1cd538783b401bc9de2c0fac999b2"}, + {file = "url_normalize-1.4.3-py2.py3-none-any.whl", hash = "sha256:ec3c301f04e5bb676d333a7fa162fa977ad2ca04b7e652bfc9fac4e405728eed"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "wcmatch" +version = "8.4" +description = "Wildcard/glob file name matcher." +optional = false +python-versions = ">=3.7" +files = [ + {file = "wcmatch-8.4-py3-none-any.whl", hash = "sha256:dc7351e5a7f8bbf4c6828d51ad20c1770113f5f3fd3dfe2a03cfde2a63f03f98"}, + {file = "wcmatch-8.4.tar.gz", hash = "sha256:ba4fc5558f8946bf1ffc7034b05b814d825d694112499c86035e0e4d398b6a67"}, +] + +[package.dependencies] +bracex = ">=2.1.1" + +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.9,<3.12" +content-hash = "40cc246c45e6c2d626e016673f3aa60794f3464d82c8ccd0b62a6b66df2b30da" diff --git a/airbyte-integrations/connectors/destination-kvdb/pyproject.toml b/airbyte-integrations/connectors/destination-kvdb/pyproject.toml new file mode 100644 index 0000000..2f61ed2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-kvdb/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = [ "poetry-core>=1.0.0",] +build-backend = "poetry.core.masonry.api" + +[tool.poetry] +version = "0.1.3" +name = "destination-kvdb" +description = "Destination implementation for kvdb." +authors = [ "Airbyte ",] +license = "MIT" +readme = "README.md" +documentation = "https://docs.airbyte.com/integrations/destinations/kvdb" +homepage = "https://airbyte.com" +repository = "https://github.com/airbytehq/airbyte" +[[tool.poetry.packages]] +include = "destination_kvdb" + +[tool.poetry.dependencies] +python = "^3.9,<3.12" +airbyte-cdk = "^0.62.1" +sgqlc = "==16.3" + +[tool.poetry.scripts] +destination-kvdb = "destination_kvdb.run:run" + +[tool.poetry.group.dev.dependencies] +requests-mock = "^1.9.3" +freezegun = "^1.2" +pytest-mock = "^3.6.1" +pytest = "^6.2" +responses = "^0.23.1" diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/README.md b/airbyte-integrations/connectors/destination-mariadb-columnstore/README.md new file mode 100644 index 0000000..1c1793e --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/README.md @@ -0,0 +1,72 @@ +# Destination Mariadb ColumnStore + +This is the repository for the Mariadb ColumnStore destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/mariadb-columnstore). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-mariadb-columnstore:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-mariadb-columnstore:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-mariadb-columnstore:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-mariadb-columnstore:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-mariadb-columnstore:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-mariadb-columnstore:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-mariadb-columnstore:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/mariadb_columnstore`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/MariadbColumnstoreDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-mariadb-columnstore:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-mariadb-columnstore:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-mariadb-columnstore test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/mariadb-columnstore.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/bootstrap.md b/airbyte-integrations/connectors/destination-mariadb-columnstore/bootstrap.md new file mode 100644 index 0000000..12fe901 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/bootstrap.md @@ -0,0 +1,24 @@ +# MariaDB ColumnStore + +## Overview + +MariaDB ColumnStore is a columnar storage engine that utilizes a massively parallel distributed data architecture. +From MariaDB 10.5.4, it is available as a storage engine for MariaDB Server. + +## Endpoints + + This destination connector uses MariaDB official JDBC driver. [MariaDB Connector/J](https://mariadb.com/docs/clients/mariadb-connectors/connector-j/) + + ## Quick Notes + + - SSH Tunnel supported. + - TLS connection not supported yet. + - When creating ColumnStore table, we have to specify storage engine. `CREATE TABLE ... (...) ENGINE=ColumnStore;` + - Normalization not supported yet for the following reasons: + - [dbt-mysql](https://github.com/dbeatty10/dbt-mysql#dbt-mysql) adapter don't support MariaDB officially. + - When using [dbt-mysql](https://github.com/dbeatty10/dbt-mysql#dbt-mysql), we cannot specify the storage engine. For that reason tables are created with system's default storage engine.(it maybe InnoDB) + +## Reference + +- MariaDB ColumnStore documents: [https://mariadb.com/kb/en/mariadb-columnstore/](https://mariadb.com/kb/en/mariadb-columnstore/) +- MariaDB JDBC driver (Connector/J) reference: [https://mariadb.com/docs/clients/mariadb-connectors/connector-j/](https://mariadb.com/docs/clients/mariadb-connectors/connector-j/) diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/build.gradle b/airbyte-integrations/connectors/destination-mariadb-columnstore/build.gradle new file mode 100644 index 0000000..8d545e9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/build.gradle @@ -0,0 +1,32 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.mariadb_columnstore.MariadbColumnstoreDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + implementation 'org.mariadb.jdbc:mariadb-java-client:2.7.4' + implementation 'com.vdurmont:semver4j:3.1.0' + + integrationTestJavaImplementation libs.testcontainers.mariadb +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/icon.svg b/airbyte-integrations/connectors/destination-mariadb-columnstore/icon.svg new file mode 100644 index 0000000..b65a725 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/metadata.yaml b/airbyte-integrations/connectors/destination-mariadb-columnstore/metadata.yaml new file mode 100644 index 0000000..049d4f5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 294a4790-429b-40ae-9516-49826b9702e1 + dockerImageTag: 0.1.7 + dockerRepository: airbyte/destination-mariadb-columnstore + githubIssueLabel: destination-mariadb-columnstore + icon: mariadb.svg + license: MIT + name: MariaDB ColumnStore + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/mariadb-columnstore + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestination.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestination.java new file mode 100644 index 0000000..af3dee3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestination.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.ssh.SshWrappedDestination; +import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.mariadb_columnstore.MariadbColumnstoreSqlOperations.VersionCompatibility; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import java.util.Map; +import javax.sql.DataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MariadbColumnstoreDestination extends AbstractJdbcDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(MariadbColumnstoreDestination.class); + public static final String DRIVER_CLASS = DatabaseDriver.MARIADB.getDriverClassName(); + static final Map DEFAULT_JDBC_PARAMETERS = ImmutableMap.of( + "allowLoadLocalInfile", "true"); + + public static Destination sshWrappedDestination() { + return new SshWrappedDestination(new MariadbColumnstoreDestination(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + } + + public MariadbColumnstoreDestination() { + super(DRIVER_CLASS, new MariadbColumnstoreNameTransformer(), new MariadbColumnstoreSqlOperations()); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + final DataSource dataSource = getDataSource(config); + try { + final JdbcDatabase database = getDatabase(dataSource); + final MariadbColumnstoreSqlOperations mariadbColumnstoreSqlOperations = (MariadbColumnstoreSqlOperations) getSqlOperations(); + final String outputSchema = getNamingResolver().getIdentifier(config.get(JdbcUtils.DATABASE_KEY).asText()); + + final VersionCompatibility compatibility = mariadbColumnstoreSqlOperations.isCompatibleVersion(database); + if (!compatibility.isCompatible()) { + throw new RuntimeException(String + .format("Your MariaDB Columnstore version %s is not compatible with Airbyte", + compatibility.getVersion())); + } + + mariadbColumnstoreSqlOperations.verifyLocalFileEnabled(database); + + attemptSQLCreateAndDropTableOperations( + outputSchema, + database, + getNamingResolver(), + mariadbColumnstoreSqlOperations); + } catch (final Exception e) { + LOGGER.error("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + } finally { + try { + DataSourceFactory.close(dataSource); + } catch (final Exception e) { + LOGGER.warn("Unable to close data source.", e); + } + } + + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } + + @Override + protected Map getDefaultConnectionProperties(final JsonNode config) { + return DEFAULT_JDBC_PARAMETERS; + } + + @Override + public JsonNode toJdbcConfig(final JsonNode config) { + final String jdbcUrl = String.format(DatabaseDriver.MARIADB.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText()); + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, config.get(JdbcUtils.USERNAME_KEY).asText()) + .put(JdbcUtils.JDBC_URL_KEY, jdbcUrl); + + if (config.has(JdbcUtils.PASSWORD_KEY)) { + configBuilder.put(JdbcUtils.PASSWORD_KEY, config.get(JdbcUtils.PASSWORD_KEY).asText()); + } + + return Jsons.jsonNode(configBuilder.build()); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = MariadbColumnstoreDestination.sshWrappedDestination(); + LOGGER.info("starting destination: {}", MariadbColumnstoreDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", MariadbColumnstoreDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreNameTransformer.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreNameTransformer.java new file mode 100644 index 0000000..c2ac254 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreNameTransformer.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +public class MariadbColumnstoreNameTransformer extends StandardNameTransformer { + + @Override + public String getIdentifier(final String name) { + return applyDefaultCase(super.getIdentifier(name)); + } + + @Override + public String applyDefaultCase(final String input) { + return input.toLowerCase(); + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java new file mode 100644 index 0000000..f1289ce --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreSqlOperations.java @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import com.vdurmont.semver4j.Semver; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class MariadbColumnstoreSqlOperations extends JdbcSqlOperations { + + private final String MINIMUM_VERSION = "5.5.3"; + Pattern VERSION_PATTERN = Pattern.compile("^(\\d+\\.\\d+\\.\\d+)-MariaDB"); + private boolean isLocalFileEnabled = false; + + @Override + public void insertRecordsInternal(final JdbcDatabase database, + final List records, + final String schemaName, + final String tmpTableName) + throws SQLException { + if (records.isEmpty()) { + return; + } + + verifyLocalFileEnabled(database); + + File tmpFile = null; + Exception primaryException = null; + try { + tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); + writeBatchToFile(tmpFile, records); + + final String query = String.format( + "LOAD DATA LOCAL INFILE %s INTO TABLE %s.%s FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\\"' LINES TERMINATED BY '\\r\\n'", + String.format("'%s'", tmpFile.getAbsolutePath()), schemaName, tmpTableName); + + database.execute(query); + } catch (final Exception e) { + primaryException = e; + throw new RuntimeException(primaryException); + } finally { + try { + if (tmpFile != null) { + Files.delete(tmpFile.toPath()); + } + } catch (final IOException e) { + if (primaryException != null) + e.addSuppressed(primaryException); + throw new RuntimeException(e); + } + } + } + + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + database.execute(connection -> { + try (final Statement stmt = connection.createStatement()) { + stmt.addBatch("BEGIN;"); + for (final String query : queries) { + stmt.addBatch(query); + } + stmt.addBatch("COMMIT;"); + stmt.executeBatch(); + } + }); + } + + @Override + public boolean isSchemaRequired() { + return false; + } + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + return String.format( + "CREATE TABLE IF NOT EXISTS %s.%s ( \n" + + "%s VARCHAR(256),\n" + + "%s LONGTEXT,\n" + + "%s TIMESTAMP\n" + + ") engine=columnstore;\n", + schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + } + + VersionCompatibility isCompatibleVersion(final JdbcDatabase database) throws SQLException { + final Semver version = getVersion(database); + return new VersionCompatibility(version, version.isGreaterThanOrEqualTo(MINIMUM_VERSION)); + } + + private Semver getVersion(final JdbcDatabase database) throws SQLException { + final List versions = database.queryStrings( + connection -> connection.createStatement().executeQuery("SELECT version()"), + resultSet -> resultSet.getString("version()")); + + final Matcher matcher = VERSION_PATTERN.matcher(versions.get(0)); + if (matcher.find()) { + return new Semver(matcher.group(1)); + } else { + throw new RuntimeException(String.format("Unexpected version string: %s\nExpected version format is X.X.X-MariaDB", versions.get(0))); + } + } + + void verifyLocalFileEnabled(final JdbcDatabase database) throws SQLException { + final boolean localFileEnabled = isLocalFileEnabled || checkIfLocalFileIsEnabled(database); + if (!localFileEnabled) { + tryEnableLocalFile(database); + } + isLocalFileEnabled = true; + } + + private boolean checkIfLocalFileIsEnabled(final JdbcDatabase database) throws SQLException { + final List localFiles = database.queryStrings( + connection -> connection.createStatement().executeQuery("SHOW GLOBAL VARIABLES LIKE 'local_infile'"), + resultSet -> resultSet.getString("Value")); + return localFiles.get(0).equalsIgnoreCase("on"); + } + + private void tryEnableLocalFile(final JdbcDatabase database) throws SQLException { + database.execute(connection -> { + try (final Statement statement = connection.createStatement()) { + statement.execute("SET GLOBAL local_infile=true"); + } catch (final Exception e) { + throw new RuntimeException( + "The DB user provided to airbyte was unable to switch on the local_infile attribute on the MariaDB server. As an admin user, you will need to run \"SET GLOBAL local_infile = true\" before syncing data with Airbyte.", + e); + } + }); + } + + public static class VersionCompatibility { + + private final Semver version; + private final boolean isCompatible; + + public VersionCompatibility(final Semver version, final boolean isCompatible) { + this.version = version; + this.isCompatible = isCompatible; + } + + public Semver getVersion() { + return version; + } + + public boolean isCompatible() { + return isCompatible; + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json new file mode 100644 index 0000000..163d9f4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/main/resources/spec.json @@ -0,0 +1,57 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/mariadb-columnstore", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MariaDB Columnstore Destination Spec", + "type": "object", + "required": ["host", "port", "username", "database"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "The Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "The Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 3306, + "examples": ["3306"], + "order": 1 + }, + "database": { + "title": "Database", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "Username", + "description": "The Username which is used to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "The Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "title": "JDBC URL Params", + "type": "string", + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariaDbTestDataComparator.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariaDbTestDataComparator.java new file mode 100644 index 0000000..73da2d5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariaDbTestDataComparator.java @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import java.util.ArrayList; +import java.util.List; + +public class MariaDbTestDataComparator extends AdvancedTestDataComparator { + + private final StandardNameTransformer namingResolver = new MariadbColumnstoreNameTransformer(); + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + + return result; + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java new file mode 100644 index 0000000..1e07845 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationAcceptanceTest.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.utility.DockerImageName; + +public class MariadbColumnstoreDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private final StandardNameTransformer namingResolver = new MariadbColumnstoreNameTransformer(); + + private MariaDBContainer db; + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getImageName() { + return "airbyte/destination-mariadb-columnstore:dev"; + } + + @Override + protected JsonNode getConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, db.getHost()) + .put(JdbcUtils.PORT_KEY, db.getFirstMappedPort()) + .put(JdbcUtils.DATABASE_KEY, db.getDatabaseName()) + .put(JdbcUtils.USERNAME_KEY, db.getUsername()) + .put(JdbcUtils.PASSWORD_KEY, db.getPassword()) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put(JdbcUtils.PASSWORD_KEY, "wrong password"); + return clone; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get(JdbcUtils.DATABASE_KEY) == null) { + return null; + } + return config.get(JdbcUtils.DATABASE_KEY).asText(); + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new MariaDbTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + final JdbcDatabase database = getDatabase(getConfig()); + final String query = String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + return database.queryJsons(query); + } + + private static JdbcDatabase getDatabase(final JsonNode config) { + return new DefaultJdbcDatabase( + DataSourceFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + config.has(JdbcUtils.PASSWORD_KEY) ? config.get(JdbcUtils.PASSWORD_KEY).asText() : null, + MariadbColumnstoreDestination.DRIVER_CLASS, + String.format(DatabaseDriver.MARIADB.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText()))); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) throws Exception { + final DockerImageName mcsImage = DockerImageName.parse("fengdi/columnstore:1.5.2").asCompatibleSubstituteFor("mariadb"); + db = new MariaDBContainer(mcsImage); + db.start(); + + final String createUser = String.format("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", db.getUsername(), db.getPassword()); + final String grantAll = String.format("GRANT ALL PRIVILEGES ON *.* TO '%s'@'%%' IDENTIFIED BY '%s';", db.getUsername(), db.getPassword()); + final String createDb = String.format("CREATE DATABASE %s DEFAULT CHARSET = utf8;", db.getDatabaseName()); + db.execInContainer("mariadb", "-e", createUser + grantAll + createDb); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + db.stop(); + db.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshKeyMariadbColumnstoreDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshKeyMariadbColumnstoreDestinationAcceptanceTest.java new file mode 100644 index 0000000..7d7b623 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshKeyMariadbColumnstoreDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; + +public class SshKeyMariadbColumnstoreDestinationAcceptanceTest extends SshMariadbColumnstoreDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_KEY_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshMariadbColumnstoreDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshMariadbColumnstoreDestinationAcceptanceTest.java new file mode 100644 index 0000000..898aa50 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshMariadbColumnstoreDestinationAcceptanceTest.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.base.ssh.SshBastionContainer; +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.commons.functional.CheckedFunction; +import io.airbyte.commons.json.Jsons; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.containers.Network; +import org.testcontainers.utility.DockerImageName; + +/** + * Abstract class that allows us to avoid duplicating testing logic for testing SSH with a key file + * or with a password. + */ +public abstract class SshMariadbColumnstoreDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(MariadbColumnstoreDestinationAcceptanceTest.class); + private static final Network network = Network.newNetwork(); + + private final StandardNameTransformer namingResolver = new MariadbColumnstoreNameTransformer(); + + private JsonNode configJson; + + private MariaDBContainer db; + + private final SshBastionContainer bastion = new SshBastionContainer(); + + public abstract SshTunnel.TunnelMethod getTunnelMethod(); + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getImageName() { + return "airbyte/destination-mariadb-columnstore:dev"; + } + + @Override + protected JsonNode getConfig() throws Exception { + return bastion.getTunnelConfig(getTunnelMethod(), bastion.getBasicDbConfigBuider(db), false); + } + + @Override + protected JsonNode getFailCheckConfig() throws Exception { + final JsonNode clone = Jsons.clone(getConfig()); + ((ObjectNode) clone).put("password", "wrong password"); + return clone; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get(JdbcUtils.DATABASE_KEY) == null) { + return null; + } + return config.get(JdbcUtils.DATABASE_KEY).asText(); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> Jsons.deserialize(r.get(JavaBaseConstants.COLUMN_NAME_DATA).asText())) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws Exception { + final JsonNode config = getConfig(); + return SshTunnel.sshWrap( + config, + JdbcUtils.HOST_LIST_KEY, + JdbcUtils.PORT_LIST_KEY, + (CheckedFunction, Exception>) mangledConfig -> getDatabaseFromConfig(mangledConfig) + .query( + ctx -> ctx + .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .stream() + .map(r -> r.formatJSON(JdbcUtils.getDefaultJSONFormat())) + .map(Jsons::deserialize) + .collect(Collectors.toList()))); + } + + private static Database getDatabaseFromConfig(final JsonNode config) { + final DSLContext dslContext = DSLContextFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + config.get(JdbcUtils.PASSWORD_KEY).asText(), + DatabaseDriver.MARIADB.getDriverClassName(), + String.format(DatabaseDriver.MARIADB.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText()), + SQLDialect.MARIADB); + return new Database(dslContext); + } + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + + return result; + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) throws Exception { + bastion.initAndStartBastion(network); + startAndInitJdbcContainer(); + } + + private void startAndInitJdbcContainer() throws Exception { + final DockerImageName mcsImage = DockerImageName.parse("fengdi/columnstore:1.5.2").asCompatibleSubstituteFor("mariadb"); + db = new MariaDBContainer<>(mcsImage) + .withNetwork(network); + db.start(); + + final String createUser = String.format("CREATE USER '%s'@'%%' IDENTIFIED BY '%s';", db.getUsername(), db.getPassword()); + final String grantAll = String.format("GRANT ALL PRIVILEGES ON *.* TO '%s'@'%%' IDENTIFIED BY '%s';", db.getUsername(), db.getPassword()); + final String createDb = String.format("CREATE DATABASE %s DEFAULT CHARSET = utf8;", db.getDatabaseName()); + db.execInContainer("mariadb", "-e", createUser + grantAll + createDb); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + bastion.stopAndCloseContainers(db); + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshPasswordMariadbColumnstoreDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshPasswordMariadbColumnstoreDestinationAcceptanceTest.java new file mode 100644 index 0000000..89c7ca6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test-integration/java/io/airbyte/integrations/destination/mariadb_columnstore/SshPasswordMariadbColumnstoreDestinationAcceptanceTest.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import io.airbyte.cdk.integrations.base.ssh.SshTunnel; + +public class SshPasswordMariadbColumnstoreDestinationAcceptanceTest extends SshMariadbColumnstoreDestinationAcceptanceTest { + + @Override + public SshTunnel.TunnelMethod getTunnelMethod() { + return SshTunnel.TunnelMethod.SSH_PASSWORD_AUTH; + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationTest.java new file mode 100644 index 0000000..11505ed --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbColumnstoreDestinationTest.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import static org.junit.jupiter.api.Assertions.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; + +public class MariadbColumnstoreDestinationTest { + + @Test + public void testToJdbcConfig() throws Exception { + final MariadbColumnstoreDestination dest = new MariadbColumnstoreDestination(); + String configJson = "{\"host\": \"localhost\", \"port\": 3306, \"database\": \"test\", \"username\": \"root\", \"password\": \"secret\"}"; + String expectedJson = "{\"username\": \"root\", \"password\": \"secret\", \"jdbc_url\": \"jdbc:mariadb://localhost:3306/test\"}"; + ObjectMapper mapper = new ObjectMapper(); + JsonNode config = mapper.readTree(configJson); + + JsonNode actual = dest.toJdbcConfig(config); + JsonNode expected = mapper.readTree(expectedJson); + + assertEquals(expected, actual); + } + +} diff --git a/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbSpecTest.java b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbSpecTest.java new file mode 100644 index 0000000..5cfea9f --- /dev/null +++ b/airbyte-integrations/connectors/destination-mariadb-columnstore/src/test/java/io/airbyte/integrations/destination/mariadb_columnstore/MariadbSpecTest.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mariadb_columnstore; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import io.airbyte.validation.json.JsonSchemaValidator; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class MariadbSpecTest { + + private static JsonNode schema; + private static JsonNode config; + private static String configText; + private static JsonSchemaValidator validator; + + @BeforeAll + static void init() throws IOException { + configText = """ + { + "host": "localhost", + "port": 1521, + "username": "mariadb", + "password": "password", + "database": "db", + "jdbc_url_params": "property1=pValue1&property2=pValue2" + } + """; + final String spec = MoreResources.readResource("spec.json"); + final File schemaFile = IOs.writeFile(Files.createTempDirectory(Path.of("/tmp"), "spec-test"), "schema.json", spec).toFile(); + schema = JsonSchemaValidator.getSchema(schemaFile).get("connectionSpecification"); + validator = new JsonSchemaValidator(); + } + + @BeforeEach + void beforeEach() { + config = Jsons.deserialize(configText); + } + + @Test + void testHostMissing() { + ((ObjectNode) config).remove("host"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testPortMissing() { + ((ObjectNode) config).remove("port"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testDatabaseMissing() { + ((ObjectNode) config).remove("database"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testUsernameMissing() { + ((ObjectNode) config).remove("username"); + assertFalse(validator.test(schema, config)); + } + + @Test + void testAdditionalJdbcParamMissing() { + ((ObjectNode) config).remove("jdbc_url_params"); + assertTrue(validator.test(schema, config)); + } + + @Test + void testWithJdbcAdditionalProperty() { + assertTrue(validator.test(schema, config)); + } + + @Test + void testJdbcAdditionalProperty() throws Exception { + final ConnectorSpecification spec = new MariadbColumnstoreDestination().spec(); + assertNotNull(spec.getConnectionSpecification().get("properties").get("jdbc_url_params")); + } + +} diff --git a/airbyte-integrations/connectors/destination-meilisearch/.dockerignore b/airbyte-integrations/connectors/destination-meilisearch/.dockerignore new file mode 100644 index 0000000..6d35a84 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_meilisearch +!setup.py diff --git a/airbyte-integrations/connectors/destination-meilisearch/Dockerfile b/airbyte-integrations/connectors/destination-meilisearch/Dockerfile new file mode 100644 index 0000000..00bbfb9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_meilisearch ./destination_meilisearch + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=1.0.1 +LABEL io.airbyte.name=airbyte/destination-meilisearch diff --git a/airbyte-integrations/connectors/destination-meilisearch/README.md b/airbyte-integrations/connectors/destination-meilisearch/README.md new file mode 100644 index 0000000..207e289 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/README.md @@ -0,0 +1,99 @@ +# Meilisearch Destination + +This is the repository for the Meilisearch destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/meilisearch). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/meilisearch) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_meilisearch/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination meilisearch test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-meilisearch build +``` + +An image will be built with the tag `airbyte/destination-meilisearch:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-meilisearch:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-meilisearch:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-meilisearch:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-meilisearch:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-meilisearch test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-meilisearch test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/meilisearch.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/__init__.py b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/__init__.py new file mode 100644 index 0000000..f83a392 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationMeilisearch + +__all__ = ["DestinationMeilisearch"] diff --git a/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/destination.py b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/destination.py new file mode 100644 index 0000000..32d08b7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/destination.py @@ -0,0 +1,84 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from logging import Logger, getLogger +from typing import Any, Dict, Iterable, Mapping + +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type +from destination_meilisearch.writer import MeiliWriter +from meilisearch import Client + +logger = getLogger("airbyte") + + +def get_client(config: Mapping[str, Any]) -> Client: + host = config.get("host") + api_key = config.get("api_key") + return Client(host, api_key) + + +class DestinationMeilisearch(Destination): + primary_key = "_ab_pk" + + def _flush_streams(self, streams: Dict[str, MeiliWriter]) -> Iterable[AirbyteMessage]: + for stream in streams: + streams[stream].flush() + + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + client = get_client(config=config) + # Creating Meilisearch writers + writers = {s.stream.name: MeiliWriter(client, s.stream.name, self.primary_key) for s in configured_catalog.streams} + + for configured_stream in configured_catalog.streams: + stream_name = configured_stream.stream.name + # Deleting index in Meilisearch if sync mode is overwite + if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite: + logger.debug(f"Deleting index: {stream_name}.") + client.delete_index(stream_name) + # Creating index in Meilisearch + client.create_index(stream_name, {"primaryKey": self.primary_key}) + logger.debug(f"Creating index: {stream_name}.") + + for message in input_messages: + if message.type == Type.STATE: + yield message + elif message.type == Type.RECORD: + data = message.record.data + stream = message.record.stream + # Skip unselected streams + if stream not in writers: + logger.debug(f"Stream {stream} was not present in configured streams, skipping") + continue + writers[stream].queue_write_operation(data) + else: + logger.info(f"Unhandled message type {message.type}: {message}") + + # Flush any leftover messages + self._flush_streams(writers) + + def check(self, logger: Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + try: + client = get_client(config=config) + + client.create_index("_airbyte", {"primaryKey": "id"}) + + client.index("_airbyte").add_documents( + [ + { + "id": 287947, + "title": "Shazam", + "overview": "A boy is given the ability", + } + ] + ) + + client.delete_index("_airbyte") + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + logger.error(f"Check connection failed. Error: {e}") + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") diff --git a/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/spec.json b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/spec.json new file mode 100644 index 0000000..f3fe7aa --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/spec.json @@ -0,0 +1,27 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/meilisearch", + "supported_destination_sync_modes": ["overwrite", "append"], + "supportsIncremental": true, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Meilisearch", + "type": "object", + "required": ["host"], + "additionalProperties": false, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the MeiliSearch instance.", + "type": "string", + "order": 0 + }, + "api_key": { + "title": "API Key", + "airbyte_secret": true, + "description": "MeiliSearch API Key. See the docs for more information on how to obtain this key.", + "type": "string", + "order": 1 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/writer.py b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/writer.py new file mode 100644 index 0000000..e2450f8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/destination_meilisearch/writer.py @@ -0,0 +1,39 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from collections.abc import Mapping +from logging import getLogger +from uuid import uuid4 + +from meilisearch import Client + +logger = getLogger("airbyte") + + +class MeiliWriter: + flush_interval = 50000 + + def __init__(self, client: Client, stream_name: str, primary_key: str): + self.client = client + self.primary_key = primary_key + self.stream_name: str = stream_name + self._write_buffer = [] + + logger.info(f"Creating MeiliWriter for {self.stream_name}") + + def queue_write_operation(self, data: Mapping): + random_key = str(uuid4()) + self._write_buffer.append({**data, self.primary_key: random_key}) + if len(self._write_buffer) == self.flush_interval: + logger.debug(f"Reached limit size: flushing records for {self.stream_name}") + self.flush() + + def flush(self): + buffer_size = len(self._write_buffer) + if buffer_size == 0: + return + logger.info(f"Flushing {buffer_size} records") + response = self.client.index(self.stream_name).add_documents(self._write_buffer) + self.client.wait_for_task(response.task_uid, 1800000, 1000) + self._write_buffer.clear() diff --git a/airbyte-integrations/connectors/destination-meilisearch/icon.svg b/airbyte-integrations/connectors/destination-meilisearch/icon.svg new file mode 100644 index 0000000..223ab4d --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-meilisearch/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-meilisearch/integration_tests/integration_test.py new file mode 100644 index 0000000..1d9687e --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/integration_tests/integration_test.py @@ -0,0 +1,103 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import logging +from typing import Any, Dict, Mapping + +import pytest +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_meilisearch.destination import DestinationMeilisearch, get_client +from meilisearch import Client + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + with open("secrets/config.json", "r") as f: + return json.loads(f.read()) + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"string_col": {"type": "str"}, "int_col": {"type": "integer"}}} + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="_airbyte", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental, SyncMode.full_refresh] + ), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[overwrite_stream]) + + +@pytest.fixture(autouse=True) +def teardown(config: Mapping): + yield + client = get_client(config=config) + client.delete_index("_airbyte") + + +@pytest.fixture(name="client") +def client_fixture(config) -> Client: + client = get_client(config=config) + resp = client.create_index("_airbyte", {"primaryKey": "_ab_pk"}) + client.wait_for_task(_handle_breaking_wait_for_task(resp)) + return client + + +def test_check_valid_config(config: Mapping): + outcome = DestinationMeilisearch().check(logging.getLogger("airbyte"), config) + assert outcome.status == Status.SUCCEEDED + + +def test_check_invalid_config(): + outcome = DestinationMeilisearch().check( + logging.getLogger("airbyte"), {"api_key": "not_a_real_key", "host": "https://www.meilisearch.com"} + ) + assert outcome.status == Status.FAILED + + +def _state(data: Dict[str, Any]) -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=data)) + + +def _record(stream: str, str_value: str, int_value: int) -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data={"str_col": str_value, "int_col": int_value}, emitted_at=0) + ) + + +def _handle_breaking_wait_for_task(task: Any) -> int: + if type(task) is dict: + return task["taskUid"] + else: + return task.task_uid + + +def records_count(client: Client) -> int: + documents_results = client.index("_airbyte").get_documents() + return documents_results.total + + +def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, client: Client): + overwrite_stream = configured_catalog.streams[0].stream.name + first_state_message = _state({"state": "1"}) + first_record_chunk = [_record(overwrite_stream, str(i), i) for i in range(2)] + + destination = DestinationMeilisearch() + list(destination.write(config, configured_catalog, [*first_record_chunk, first_state_message])) + assert records_count(client) == 2 diff --git a/airbyte-integrations/connectors/destination-meilisearch/integration_tests/messages.jsonl b/airbyte-integrations/connectors/destination-meilisearch/integration_tests/messages.jsonl new file mode 100644 index 0000000..e1d0682 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/integration_tests/messages.jsonl @@ -0,0 +1,2 @@ +{"type": "RECORD", "record": {"stream": "ab-airbyte-testing", "data": {"_ab_pk": "my_value", "column2": 221, "column3": "2021-01-01T20:10:22", "column4": 1.214, "column5": [1,2,3]}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "ab-airbyte-testing", "data": {"_ab_pk": "my_value2", "column2": 222, "column3": "2021-01-02T22:10:22", "column5": [1,2,null]}, "emitted_at": 1626172757000}} diff --git a/airbyte-integrations/connectors/destination-meilisearch/main.py b/airbyte-integrations/connectors/destination-meilisearch/main.py new file mode 100644 index 0000000..a5eba93 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_meilisearch import DestinationMeilisearch + +if __name__ == "__main__": + DestinationMeilisearch().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-meilisearch/metadata.yaml b/airbyte-integrations/connectors/destination-meilisearch/metadata.yaml new file mode 100644 index 0000000..1b30cb1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: api + connectorType: destination + definitionId: af7c921e-5892-4ff2-b6c1-4a5ab258fb7e + dockerImageTag: 1.0.1 + dockerRepository: airbyte/destination-meilisearch + githubIssueLabel: destination-meilisearch + icon: meilisearch.svg + license: MIT + name: MeiliSearch + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/meilisearch + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-meilisearch/requirements.txt b/airbyte-integrations/connectors/destination-meilisearch/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-meilisearch/sample_files/configured_catalog.json b/airbyte-integrations/connectors/destination-meilisearch/sample_files/configured_catalog.json new file mode 100644 index 0000000..9ac002e --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/sample_files/configured_catalog.json @@ -0,0 +1,27 @@ +{ + "streams": [ + { + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "stream": { + "name": "ab-airbyte-testing", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "body": { + "type": "string" + }, + "attributes": { + "type": ["null", "object"] + } + } + } + } + } + ] +} diff --git a/airbyte-integrations/connectors/destination-meilisearch/setup.py b/airbyte-integrations/connectors/destination-meilisearch/setup.py new file mode 100644 index 0000000..9d9bfe3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/setup.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "meilisearch>=0.22.0"] + +TEST_REQUIREMENTS = ["pytest~=6.1"] + +setup( + name="destination_meilisearch", + description="Destination implementation for Meilisearch.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-meilisearch/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-meilisearch/unit_tests/unit_test.py new file mode 100644 index 0000000..c09a3f7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-meilisearch/unit_tests/unit_test.py @@ -0,0 +1,29 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from unittest.mock import patch + +from destination_meilisearch.writer import MeiliWriter + + +@patch("meilisearch.Client") +def test_queue_write_operation(client): + writer = MeiliWriter(client, "stream_name", "primary_key") + writer.queue_write_operation({"a": "a"}) + assert len(writer._write_buffer) == 1 + writer.queue_write_operation({"b": "b"}) + assert len(writer._write_buffer) == 2 + writer2 = MeiliWriter(client, "stream_name2", "primary_key") + writer2.queue_write_operation({"a": "a"}) + assert len(writer2._write_buffer) == 1 + assert len(writer._write_buffer) == 2 + + +@patch("meilisearch.Client") +def test_flush(client): + writer = MeiliWriter(client, "stream_name", "primary_key") + writer.queue_write_operation({"a": "a"}) + writer.flush() + client.index.assert_called_once_with("stream_name") + client.wait_for_task.assert_called_once() diff --git a/airbyte-integrations/connectors/destination-mqtt/README.md b/airbyte-integrations/connectors/destination-mqtt/README.md new file mode 100644 index 0000000..f9d73b5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/README.md @@ -0,0 +1,72 @@ +# Destination MQTT + +This is the repository for the MQTT destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/mqtt). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-mqtt:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-mqtt:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-mqtt:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-mqtt:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-mqtt:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-mqtt:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-mqtt:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/mqtt`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/MqttDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from Airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-mqtt:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-mqtt:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-mqtt test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/mqtt.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-mqtt/build.gradle b/airbyte-integrations/connectors/destination-mqtt/build.gradle new file mode 100644 index 0000000..599b538 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.mqtt.MqttDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + implementation 'org.eclipse.paho:org.eclipse.paho.client.mqttv3:1.2.5' + + testImplementation 'com.hivemq:hivemq-testcontainer-junit5:2.0.0' +} diff --git a/airbyte-integrations/connectors/destination-mqtt/icon.svg b/airbyte-integrations/connectors/destination-mqtt/icon.svg new file mode 100644 index 0000000..fc95188 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-mqtt/metadata.yaml b/airbyte-integrations/connectors/destination-mqtt/metadata.yaml new file mode 100644 index 0000000..b99536f --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: message_queue + connectorType: destination + definitionId: f3802bc4-5406-4752-9e8d-01e504ca8194 + dockerImageTag: 0.1.3 + dockerRepository: airbyte/destination-mqtt + githubIssueLabel: destination-mqtt + icon: mqtt.svg + license: MIT + name: MQTT + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/mqtt + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestination.java b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestination.java new file mode 100644 index 0000000..179ddfb --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestination.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mqtt; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Charsets; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.eclipse.paho.client.mqttv3.IMqttAsyncClient; +import org.eclipse.paho.client.mqttv3.MqttAsyncClient; +import org.eclipse.paho.client.mqttv3.MqttMessage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MqttDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(MqttDestination.class); + + public static final String COLUMN_NAME_AB_ID = JavaBaseConstants.COLUMN_NAME_AB_ID; + public static final String COLUMN_NAME_EMITTED_AT = JavaBaseConstants.COLUMN_NAME_EMITTED_AT; + public static final String COLUMN_NAME_DATA = JavaBaseConstants.COLUMN_NAME_DATA; + public static final String COLUMN_NAME_STREAM = "_airbyte_stream"; + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try { + final MqttDestinationConfig mqttConfig = MqttDestinationConfig.getMqttDestinationConfig(config); + final String testTopic = mqttConfig.getTestTopic(); + if (!testTopic.isBlank()) { + try (final IMqttAsyncClient client = new MqttAsyncClient(mqttConfig.getServerUri(), mqttConfig.getClientId())) { + client.connect(mqttConfig.getMqttConnectOptions()).waitForCompletion(); + + final String key = UUID.randomUUID().toString(); + final JsonNode payload = Jsons.jsonNode(ImmutableMap.of( + COLUMN_NAME_AB_ID, key, + COLUMN_NAME_STREAM, "test-topic-stream", + COLUMN_NAME_EMITTED_AT, System.currentTimeMillis(), + COLUMN_NAME_DATA, Jsons.jsonNode(ImmutableMap.of("test-key", "test-value")))); + + final MqttMessage message = new MqttMessage(payload.toString().getBytes(Charsets.UTF_8)); + message.setQos(mqttConfig.getQos()); + message.setRetained(mqttConfig.isRetainedMessage()); + + client.publish(testTopic, message).getMessage(); + client.disconnectForcibly(); + + LOGGER.info("Successfully sent message with key '{}' to MQTT broker for topic '{}'.", key, testTopic); + } + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception attempting to connect to the MQTT broker: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect to the MQTT broker with provided configuration. \n" + e.getMessage()); + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + return new MqttRecordConsumer(MqttDestinationConfig.getMqttDestinationConfig(config), + catalog, + outputRecordCollector); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new MqttDestination(); + LOGGER.info("Starting destination: {}", MqttDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("Completed destination: {}", MqttDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestinationConfig.java b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestinationConfig.java new file mode 100644 index 0000000..d3efb68 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttDestinationConfig.java @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mqtt; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.UUID; +import org.eclipse.paho.client.mqttv3.MqttConnectOptions; + +public class MqttDestinationConfig { + + private final String clientId; + private final String serverUri; + private final String topicPattern; + private final String testTopic; + private final MqttConnectOptions options; + private final boolean retained; + private final boolean sync; + private final int qOs; + + private MqttDestinationConfig(final JsonNode config) { + this.clientId = buildClientId(config); + this.serverUri = buildServerUri(config); + this.testTopic = buildTestTopic(config); + this.topicPattern = buildTopicPattern(config); + this.options = buildMqttConnectOptions(config); + this.retained = isRetained(config); + this.sync = isSyncProducer(config); + this.qOs = buildQos(config); + } + + public static MqttDestinationConfig getMqttDestinationConfig(final JsonNode config) { + return new MqttDestinationConfig(config); + } + + public String getClientId() { + return clientId; + } + + public int getQos() { + return qOs; + } + + public MqttConnectOptions getMqttConnectOptions() { + return options; + } + + public String getServerUri() { + return serverUri; + } + + public String getTestTopic() { + return testTopic; + } + + public String getTopicPattern() { + return topicPattern; + } + + public boolean isSync() { + return sync; + } + + public boolean isRetainedMessage() { + return retained; + } + + private String buildClientId(final JsonNode config) { + if (config.has("client_id")) { + return config.get("client_id").asText(); + } + return "airbyte-" + UUID.randomUUID(); + } + + private MqttConnectOptions buildMqttConnectOptions(final JsonNode config) { + final MqttConnectOptions options = new MqttConnectOptions(); + options.setConnectionTimeout(config.get("connect_timeout").intValue()); + options.setAutomaticReconnect(config.get("automatic_reconnect").booleanValue()); + options.setCleanSession(config.get("clean_session").booleanValue()); + if (config.has("username") && !config.get("username").asText().isBlank()) { + options.setUserName(config.get("username").asText()); + } + if (config.has("password") && !config.get("password").asText().isBlank()) { + options.setPassword(config.get("password").asText().toCharArray()); + } + + if (config.has("max_in_flight") && !config.get("max_in_flight").asText().isBlank()) { + options.setMaxInflight(config.get("max_in_flight").asInt()); + } + + return options; + } + + private String buildServerUri(final JsonNode config) { + return String.format("%s://%s:%s", + config.get("use_tls").asBoolean() ? "ssl" : "tcp", + config.get("broker_host").asText(), + config.get("broker_port").intValue()); + } + + private String buildTestTopic(final JsonNode config) { + return config.has("test_topic") ? config.get("test_topic").asText() : ""; + } + + private String buildTopicPattern(final JsonNode config) { + return config.get("topic_pattern").asText(); + } + + private boolean isRetained(final JsonNode config) { + return config.get("message_retained").asBoolean(); + } + + private boolean isSyncProducer(final JsonNode config) { + return config.get("publisher_sync").asBoolean(); + } + + private int buildQos(final JsonNode config) { + return MessageQoS.valueOf(config.get("message_qos").asText()).getQos(); + } + + private enum MessageQoS { + + AT_MOST_ONCE(0), + AT_LEAST_ONCE(1), + EXACTLY_ONCE(2); + + private final int qos; + + MessageQoS(final int qos) { + this.qos = qos; + } + + public int getQos() { + return qos; + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumer.java b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumer.java new file mode 100644 index 0000000..5c4c3b0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/main/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumer.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mqtt; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Charsets; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.eclipse.paho.client.mqttv3.IMqttActionListener; +import org.eclipse.paho.client.mqttv3.IMqttAsyncClient; +import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken; +import org.eclipse.paho.client.mqttv3.IMqttToken; +import org.eclipse.paho.client.mqttv3.MqttAsyncClient; +import org.eclipse.paho.client.mqttv3.MqttException; +import org.eclipse.paho.client.mqttv3.MqttMessage; +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MqttRecordConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(MqttRecordConsumer.class); + + private final MqttDestinationConfig config; + private final Map topicMap; + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + private final IMqttAsyncClient client; + + private AirbyteMessage lastStateMessage = null; + + public MqttRecordConsumer(final MqttDestinationConfig mqttDestinationConfig, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + this.config = mqttDestinationConfig; + this.topicMap = new HashMap<>(); + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + this.client = buildMqttClient(); + } + + private IMqttAsyncClient buildMqttClient() { + try { + return new MqttAsyncClient(config.getServerUri(), config.getClientId(), new MemoryPersistence()); + } catch (MqttException e) { + throw new RuntimeException("Error creating MQTT client", e); + } + } + + @Override + protected void startTracked() { + try { + client.connect(config.getMqttConnectOptions()).waitForCompletion(); + } catch (MqttException e) { + throw new RuntimeException("Error connecting to MQTT broker", e); + } + topicMap.putAll(buildTopicMap()); + } + + @Override + protected void acceptTracked(final AirbyteMessage airbyteMessage) { + if (airbyteMessage.getType() == AirbyteMessage.Type.STATE) { + lastStateMessage = airbyteMessage; + } else if (airbyteMessage.getType() == AirbyteMessage.Type.RECORD) { + final AirbyteRecordMessage recordMessage = airbyteMessage.getRecord(); + final String topic = topicMap.get(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage)); + + final String key = UUID.randomUUID().toString(); + final JsonNode payload = Jsons.jsonNode(ImmutableMap.of( + MqttDestination.COLUMN_NAME_AB_ID, key, + MqttDestination.COLUMN_NAME_STREAM, recordMessage.getStream(), + MqttDestination.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt(), + MqttDestination.COLUMN_NAME_DATA, recordMessage.getData())); + + final MqttMessage message = new MqttMessage(payload.toString().getBytes(Charsets.UTF_8)); + message.setRetained(config.isRetainedMessage()); + message.setQos(config.getQos()); + + sendRecord(topic, message); + } else { + LOGGER.warn("Unexpected message: " + airbyteMessage.getType()); + } + } + + Map buildTopicMap() { + return catalog.getStreams().stream() + .map(stream -> AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())) + .collect(Collectors.toMap(Function.identity(), pair -> config.getTopicPattern() + .replaceAll("\\{namespace}", Optional.ofNullable(pair.getNamespace()).orElse("")) + .replaceAll("\\{stream}", Optional.ofNullable(pair.getName()).orElse("")), + (existing, newValue) -> existing)); + } + + private void sendRecord(final String topic, final MqttMessage message) { + try { + final IMqttDeliveryToken token = client.publish(topic, message, null, new MessageActionListener(outputRecordCollector, lastStateMessage)); + if (config.isSync()) { + token.waitForCompletion(); + } + } catch (MqttException e) { + LOGGER.error("Error sending message to topic '{}'.", topic, e); + throw new RuntimeException("Cannot send message to MQTT. Error: " + e.getMessage(), e); + } + } + + @Override + protected void close(final boolean hasFailed) { + Exceptions.swallow(client::disconnectForcibly); + Exceptions.swallow(client::close); + + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + + private static class MessageActionListener implements IMqttActionListener { + + private final AirbyteMessage lastStateMessage; + private final Consumer outputRecordCollector; + + MessageActionListener(Consumer outputRecordCollector, AirbyteMessage lastStateMessage) { + this.outputRecordCollector = outputRecordCollector; + this.lastStateMessage = lastStateMessage; + } + + @Override + public void onSuccess(IMqttToken asyncActionToken) { + if (lastStateMessage != null) { + outputRecordCollector.accept(lastStateMessage); + } + } + + @Override + public void onFailure(IMqttToken asyncActionToken, Throwable exception) { + throw new RuntimeException("Cannot deliver message with ID '" + asyncActionToken.getMessageId() + "'", exception); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-mqtt/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-mqtt/src/main/resources/spec.json new file mode 100644 index 0000000..64933cb --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/main/resources/spec.json @@ -0,0 +1,108 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/mqtt", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "MQTT Destination Spec", + "type": "object", + "required": [ + "broker_host", + "broker_port", + "use_tls", + "topic_pattern", + "publisher_sync", + "connect_timeout", + "automatic_reconnect", + "clean_session", + "message_retained", + "message_qos" + ], + "additionalProperties": true, + "properties": { + "broker_host": { + "title": "MQTT broker host", + "description": "Host of the broker to connect to.", + "type": "string" + }, + "broker_port": { + "title": "MQTT broker port", + "description": "Port of the broker.", + "type": "integer" + }, + "use_tls": { + "title": "Use TLS", + "description": "Whether to use TLS encryption on the connection.", + "type": "boolean", + "default": false + }, + "username": { + "title": "Username", + "description": "User name to use for the connection.", + "type": "string" + }, + "password": { + "title": "Password", + "description": "Password to use for the connection.", + "type": "string", + "airbyte_secret": true + }, + "topic_pattern": { + "title": "Topic pattern", + "description": "Topic pattern in which the records will be sent. You can use patterns like '{namespace}' and/or '{stream}' to send the message to a specific topic based on these values. Notice that the topic name will be transformed to a standard naming convention.", + "type": "string", + "examples": ["sample.topic", "{namespace}/{stream}/sample"] + }, + "topic_test": { + "title": "Test topic", + "description": "Topic to test if Airbyte can produce messages.", + "type": "string", + "examples": ["test/topic"] + }, + "client": { + "title": "Client ID", + "description": "A client identifier that is unique on the server being connected to.", + "type": "string", + "examples": ["airbyte-client1"] + }, + "publisher_sync": { + "title": "Sync publisher", + "description": "Wait synchronously until the record has been sent to the broker.", + "type": "boolean", + "default": false + }, + "connect_timeout": { + "title": "Connect timeout", + "description": " Maximum time interval (in seconds) the client will wait for the network connection to the MQTT server to be established.", + "type": "integer", + "default": 30 + }, + "automatic_reconnect": { + "title": "Automatic reconnect", + "description": "Whether the client will automatically attempt to reconnect to the server if the connection is lost.", + "type": "boolean", + "default": true + }, + "clean_session": { + "title": "Clean session", + "description": "Whether the client and server should remember state across restarts and reconnects.", + "type": "boolean", + "default": true + }, + "message_retained": { + "title": "Message retained", + "description": "Whether or not the publish message should be retained by the messaging engine.", + "type": "boolean", + "default": false + }, + "message_qos": { + "title": "Message QoS", + "description": "Quality of service used for each message to be delivered.", + "default": "AT_LEAST_ONCE", + "enum": ["AT_MOST_ONCE", "AT_LEAST_ONCE", "EXACTLY_ONCE"] + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-mqtt/src/test-integration/java/io/airbyte/integrations/destination/mqtt/MqttDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-mqtt/src/test-integration/java/io/airbyte/integrations/destination/mqtt/MqttDestinationAcceptanceTest.java new file mode 100644 index 0000000..6cb4ab0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/test-integration/java/io/airbyte/integrations/destination/mqtt/MqttDestinationAcceptanceTest.java @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mqtt; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import com.hivemq.testcontainer.junit5.HiveMQTestContainerExtension; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import org.eclipse.paho.client.mqttv3.MqttClient; +import org.eclipse.paho.client.mqttv3.MqttConnectOptions; +import org.eclipse.paho.client.mqttv3.MqttException; +import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.testcontainers.utility.DockerImageName; + +public class MqttDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String TOPIC_PREFIX = "test/integration/"; + private static final String TOPIC_NAME = "test.topic"; + private static final ObjectReader READER = new ObjectMapper().reader(); + + private final Map> recordsPerTopic = new HashMap<>(); + private MqttClient client; + + @RegisterExtension + public final HiveMQTestContainerExtension extension = new HiveMQTestContainerExtension(DockerImageName.parse("hivemq/hivemq-ce:2021.2")); + + @Override + protected String getImageName() { + return "airbyte/destination-mqtt:dev"; + } + + @Override + protected JsonNode getConfig() throws UnknownHostException { + return Jsons.jsonNode(ImmutableMap.builder() + .put("broker_host", getIpAddress()) + .put("broker_port", extension.getMqttPort()) + .put("use_tls", false) + .put("topic_pattern", TOPIC_PREFIX + "{namespace}/{stream}/" + TOPIC_NAME) + .put("client_id", UUID.randomUUID()) + .put("publisher_sync", true) + .put("connect_timeout", 10) + .put("automatic_reconnect", true) + .put("clean_session", true) + .put("message_retained", false) + .put("message_qos", "EXACTLY_ONCE") + .put("max_in_flight", 1000) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("broker_host", extension.getHost()) + .put("broker_port", extension.getMqttPort()) + .put("topic_pattern", TOPIC_PREFIX + "{namespace}/{stream}/" + TOPIC_NAME) + .put("client_id", UUID.randomUUID()) + .put("publisher_sync", true) + .put("connect_timeout", 10) + .put("automatic_reconnect", true) + .build()); + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + return ""; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) { + return retrieveRecords(testEnv, streamName, namespace, null); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) { + final String topic = TOPIC_PREFIX + namespace + "/" + streamName + "/" + TOPIC_NAME; + return recordsPerTopic.getOrDefault(topic, Collections.emptyList()); + } + + @SuppressWarnings("UnstableApiUsage") + private String getIpAddress() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .filter(add -> !add.isLoopbackAddress()) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .findFirst().orElse(InetAddress.getLocalHost().getHostAddress()); + } catch (final SocketException e) { + return InetAddress.getLocalHost().getHostAddress(); + } + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) throws MqttException { + recordsPerTopic.clear(); + client = new MqttClient("tcp://" + extension.getHost() + ":" + extension.getMqttPort(), UUID.randomUUID().toString(), new MemoryPersistence()); + + final MqttConnectOptions options = new MqttConnectOptions(); + options.setAutomaticReconnect(true); + + client.connect(options); + + client.subscribe(TOPIC_PREFIX + "#", (topic, msg) -> { + final List records = recordsPerTopic.getOrDefault(topic, new ArrayList<>()); + records.add(READER.readTree(msg.getPayload()).get(MqttDestination.COLUMN_NAME_DATA)); + recordsPerTopic.put(topic, records); + }); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) throws MqttException { + client.disconnectForcibly(); + client.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-mqtt/src/test/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumerTest.java b/airbyte-integrations/connectors/destination-mqtt/src/test/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumerTest.java new file mode 100644 index 0000000..c3ad236 --- /dev/null +++ b/airbyte-integrations/connectors/destination-mqtt/src/test/java/io/airbyte/integrations/destination/mqtt/MqttRecordConsumerTest.java @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.mqtt; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.mock; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; +import com.hivemq.testcontainer.junit5.HiveMQTestContainerExtension; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; +import org.testcontainers.utility.DockerImageName; + +@DisplayName("MqttRecordConsumer") +public class MqttRecordConsumerTest { + + @RegisterExtension + public final HiveMQTestContainerExtension extension = new HiveMQTestContainerExtension(DockerImageName.parse("hivemq/hivemq-ce:2021.2")); + + @ParameterizedTest + @ArgumentsSource(TopicMapArgumentsProvider.class) + @SuppressWarnings("unchecked") + public void testBuildTopicMap(final ConfiguredAirbyteCatalog catalog, + final String streamName, + final String namespace, + final String topicPattern, + final String expectedTopic) { + final MqttDestinationConfig config = MqttDestinationConfig + .getMqttDestinationConfig(getConfig(extension.getHost(), extension.getMqttPort(), topicPattern)); + + final MqttRecordConsumer recordConsumer = new MqttRecordConsumer(config, catalog, mock(Consumer.class)); + final Map topicMap = recordConsumer.buildTopicMap(); + assertEquals(Sets.newHashSet(catalog.getStreams()).size(), topicMap.size()); + + final AirbyteStreamNameNamespacePair streamNameNamespacePair = new AirbyteStreamNameNamespacePair(streamName, namespace); + assertEquals(expectedTopic, topicMap.get(streamNameNamespacePair)); + } + + @Test + @SuppressWarnings("unchecked") + void testCannotConnectToBrokers() throws Exception { + final MqttDestinationConfig config = MqttDestinationConfig + .getMqttDestinationConfig(getConfig(extension.getHost(), extension.getMqttPort() + 10, "test-topic")); + + final String streamName = "test-stream"; + final String namespace = "test-schema"; + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + streamName, + namespace, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)))); + final MqttRecordConsumer consumer = new MqttRecordConsumer(config, catalog, mock(Consumer.class)); + final List expectedRecords = getNRecords(10, streamName, namespace); + + assertThrows(RuntimeException.class, consumer::start); + + expectedRecords.forEach(m -> assertThrows(RuntimeException.class, () -> consumer.accept(m))); + + consumer.accept(new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(namespace + "." + streamName, 0))))); + consumer.close(); + } + + private JsonNode getConfig(final String broker, final int port, final String topic) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("broker_host", broker) + .put("broker_port", port) + .put("use_tls", false) + .put("topic_pattern", topic) + .put("publisher_sync", true) + .put("connect_timeout", 10) + .put("automatic_reconnect", false) + .put("clean_session", true) + .put("message_retained", true) + .put("message_qos", "EXACTLY_ONCE") + .build()); + } + + private List getNRecords(final int n, final String streamName, final String namespace) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + + } + + public static class TopicMapArgumentsProvider implements ArgumentsProvider { + + private static final String TOPIC_NAME = "test.topic"; + private static final String SCHEMA_NAME1 = "public"; + private static final String STREAM_NAME1 = "id_and_name"; + private static final String SCHEMA_NAME2 = SCHEMA_NAME1 + 2; + private static final String STREAM_NAME2 = STREAM_NAME1 + 2; + + private final ConfiguredAirbyteStream stream1 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME1, + STREAM_NAME1, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)); + private final ConfiguredAirbyteStream stream2 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME2, + STREAM_NAME2, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)); + + @Override + public Stream provideArguments(final ExtensionContext context) { + final List catalogs = new ArrayList<>(); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream2))); + + return catalogs.stream() + .flatMap(catalog -> catalog.getStreams().stream() + .map(stream -> buildArgs(catalog, stream.getStream())) + .flatMap(Collection::stream)); + } + + private List buildArgs(final ConfiguredAirbyteCatalog catalog, final AirbyteStream stream) { + return ImmutableList.of( + Arguments.of(catalog, stream.getName(), stream.getNamespace(), TOPIC_NAME, TOPIC_NAME), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "test-topic", "test-topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}", stream.getNamespace()), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{stream}", stream.getName()), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}.{stream}." + TOPIC_NAME, + stream.getNamespace() + "." + stream.getName() + "." + TOPIC_NAME), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}-{stream}-" + TOPIC_NAME, + stream.getNamespace() + "-" + stream.getName() + "-" + TOPIC_NAME), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "topic with spaces", "topic with spaces"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "UppercaseTopic/test", "UppercaseTopic/test")); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/README.md b/airbyte-integrations/connectors/destination-pulsar/README.md new file mode 100644 index 0000000..f2c554b --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/README.md @@ -0,0 +1,72 @@ +# Destination Pulsar + +This is the repository for the Pulsar destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/pulsar). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-pulsar:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-pulsar:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-pulsar:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-pulsar:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/pulsar`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/PulsarDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-pulsar:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-pulsar test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/pulsar.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-pulsar/build.gradle b/airbyte-integrations/connectors/destination-pulsar/build.gradle new file mode 100644 index 0000000..fad585d --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.pulsar.PulsarDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + implementation 'org.apache.pulsar:pulsar-client:2.8.1' + + testImplementation libs.testcontainers.pulsar +} diff --git a/airbyte-integrations/connectors/destination-pulsar/icon.svg b/airbyte-integrations/connectors/destination-pulsar/icon.svg new file mode 100644 index 0000000..dacd88f --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-pulsar/metadata.yaml b/airbyte-integrations/connectors/destination-pulsar/metadata.yaml new file mode 100644 index 0000000..a83610b --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 2340cbba-358e-11ec-8d3d-0242ac130203 + dockerImageTag: 0.1.3 + dockerRepository: airbyte/destination-pulsar + githubIssueLabel: destination-pulsar + icon: pulsar.svg + license: MIT + name: Pulsar + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/pulsar + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java new file mode 100644 index 0000000..79e5e72 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestination.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarDestination.class); + + public static final String COLUMN_NAME_AB_ID = JavaBaseConstants.COLUMN_NAME_AB_ID; + public static final String COLUMN_NAME_EMITTED_AT = JavaBaseConstants.COLUMN_NAME_EMITTED_AT; + public static final String COLUMN_NAME_DATA = JavaBaseConstants.COLUMN_NAME_DATA; + public static final String COLUMN_NAME_STREAM = "_airbyte_stream"; + + private final StandardNameTransformer namingResolver; + + public PulsarDestination() { + this.namingResolver = new StandardNameTransformer(); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + try { + final PulsarDestinationConfig pulsarConfig = PulsarDestinationConfig.getPulsarDestinationConfig(config); + final String testTopic = pulsarConfig.getTestTopic(); + if (!testTopic.isBlank()) { + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, "test-topic-stream") + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, System.currentTimeMillis()) + .set(PulsarDestination.COLUMN_NAME_DATA, Jsons.jsonNode(ImmutableMap.of("test-key", "test-value"))) + .build(); + + try (final PulsarClient client = PulsarUtils.buildClient(pulsarConfig.getServiceUrl()); + final Producer producer = PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), + pulsarConfig.getProducerConfig(), pulsarConfig.uriForTopic(testTopic))) { + final MessageId messageId = producer.send(value); + + producer.flush(); + + LOGGER.info("Successfully sent message id '{}' to Pulsar brokers for topic '{}'.", messageId, testTopic); + } + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception attempting to connect to the Pulsar brokers: ", e); + return new AirbyteConnectionStatus() + .withStatus(Status.FAILED) + .withMessage("Could not connect to the Pulsar brokers with provided configuration. \n" + e.getMessage()); + } + } + + @Override + public AirbyteMessageConsumer getConsumer(final JsonNode config, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + final PulsarDestinationConfig pulsarConfig = PulsarDestinationConfig.getPulsarDestinationConfig(config); + return new PulsarRecordConsumer(pulsarConfig, + catalog, + PulsarUtils.buildClient(pulsarConfig.getServiceUrl()), + outputRecordCollector, + namingResolver); + } + + public static void main(final String[] args) throws Exception { + final Destination destination = new PulsarDestination(); + LOGGER.info("Starting destination: {}", PulsarDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("Completed destination: {}", PulsarDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java new file mode 100644 index 0000000..39dd6b0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationConfig.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.pulsar.client.api.CompressionType; +import org.apache.pulsar.client.api.schema.RecordSchemaBuilder; +import org.apache.pulsar.client.api.schema.SchemaBuilder; +import org.apache.pulsar.common.schema.SchemaInfo; +import org.apache.pulsar.common.schema.SchemaType; + +public class PulsarDestinationConfig { + + private final String serviceUrl; + private final String topicPattern; + private final String topicPrefix; + private final String testTopic; + private final Map producerConfig; + private final boolean sync; + + private PulsarDestinationConfig(final JsonNode config) { + this.serviceUrl = buildServiceUrl(config); + this.topicPattern = buildTopicPattern(config); + this.topicPrefix = buildTopicPrefix(config); + this.testTopic = buildTestTopic(config); + this.producerConfig = buildProducerConfig(config); + this.sync = isSyncProducer(config); + } + + public static PulsarDestinationConfig getPulsarDestinationConfig(final JsonNode config) { + return new PulsarDestinationConfig(config); + } + + public Map getProducerConfig() { + return producerConfig; + } + + public String getServiceUrl() { + return serviceUrl; + } + + public static SchemaInfo getSchemaInfo() { + RecordSchemaBuilder recordSchemaBuilder = SchemaBuilder.record("airbyte"); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_AB_ID).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_STREAM).type(SchemaType.STRING).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_EMITTED_AT).type(SchemaType.TIMESTAMP).required(); + recordSchemaBuilder.field(PulsarDestination.COLUMN_NAME_DATA).type(SchemaType.BYTES).required(); + + return recordSchemaBuilder.build(SchemaType.JSON); + } + + public String uriForTopic(final String topic) { + return topicPrefix + topic; + } + + public String getTestTopic() { + return testTopic; + } + + public String getTopicPattern() { + return topicPattern; + } + + public boolean isSync() { + return sync; + } + + private String buildServiceUrl(final JsonNode config) { + return String.format("pulsar%s://%s", + config.get("use_tls").asBoolean() ? "+ssl" : "", + config.get("brokers").asText()); + } + + private String buildTestTopic(final JsonNode config) { + return config.has("test_topic") ? config.get("test_topic").asText() : ""; + } + + private String buildTopicPattern(final JsonNode config) { + return config.get("topic_pattern").asText(); + } + + private String buildTopicPrefix(final JsonNode config) { + return String.format("%s://%s/%s/", + config.get("topic_type").asText(), + config.get("topic_tenant").asText(), + config.get("topic_namespace").asText()); + } + + private Map buildProducerConfig(final JsonNode config) { + final ImmutableMap.Builder conf = ImmutableMap.builder(); + if (config.has("producer_name")) { + conf.put("producerName", config.get("producer_name").asText()); + } + conf.put("compressionType", CompressionType.valueOf(config.get("compression_type").asText())); + conf.put("sendTimeoutMs", config.get("send_timeout_ms").asInt()); + conf.put("maxPendingMessages", config.get("max_pending_messages").asInt()); + conf.put("maxPendingMessagesAcrossPartitions", config.get("max_pending_messages_across_partitions").asInt()); + conf.put("batchingEnabled", config.get("batching_enabled").asBoolean()); + conf.put("batchingMaxMessages", config.get("batching_max_messages").asInt()); + conf.put("batchingMaxPublishDelayMicros", config.get("batching_max_publish_delay").asInt() * 1000); + conf.put("blockIfQueueFull", config.get("block_if_queue_full").asBoolean()); + + return conf.build(); + } + + private boolean isSyncProducer(final JsonNode config) { + return config.has("producer_sync") && config.get("producer_sync").asBoolean(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java new file mode 100644 index 0000000..1ccd22c --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumer.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class PulsarRecordConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(PulsarRecordConsumer.class); + + private final PulsarDestinationConfig config; + private final Map> producerMap; + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + private final NamingConventionTransformer nameTransformer; + private final PulsarClient client; + + public PulsarRecordConsumer(final PulsarDestinationConfig pulsarDestinationConfig, + final ConfiguredAirbyteCatalog catalog, + final PulsarClient pulsarClient, + final Consumer outputRecordCollector, + final NamingConventionTransformer nameTransformer) { + this.config = pulsarDestinationConfig; + this.producerMap = new HashMap<>(); + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + this.nameTransformer = nameTransformer; + this.client = pulsarClient; + } + + @Override + protected void startTracked() { + producerMap.putAll(buildProducerMap()); + } + + @Override + protected void acceptTracked(final AirbyteMessage airbyteMessage) { + if (airbyteMessage.getType() == AirbyteMessage.Type.STATE) { + outputRecordCollector.accept(airbyteMessage); + } else if (airbyteMessage.getType() == AirbyteMessage.Type.RECORD) { + final AirbyteRecordMessage recordMessage = airbyteMessage.getRecord(); + final Producer producer = producerMap.get(AirbyteStreamNameNamespacePair.fromRecordMessage(recordMessage)); + final String key = UUID.randomUUID().toString(); + final GenericRecord value = Schema.generic(PulsarDestinationConfig.getSchemaInfo()) + .newRecordBuilder() + .set(PulsarDestination.COLUMN_NAME_AB_ID, key) + .set(PulsarDestination.COLUMN_NAME_STREAM, recordMessage.getStream()) + .set(PulsarDestination.COLUMN_NAME_EMITTED_AT, recordMessage.getEmittedAt()) + .set(PulsarDestination.COLUMN_NAME_DATA, recordMessage.getData().toString().getBytes(StandardCharsets.UTF_8)) + .build(); + + sendRecord(producer, value); + } else { + LOGGER.warn("Unexpected message: " + airbyteMessage.getType()); + } + } + + Map> buildProducerMap() { + return catalog.getStreams().stream() + .map(stream -> AirbyteStreamNameNamespacePair.fromAirbyteStream(stream.getStream())) + .collect(Collectors.toMap(Function.identity(), pair -> { + String topic = nameTransformer.getIdentifier(config.getTopicPattern() + .replaceAll("\\{namespace}", Optional.ofNullable(pair.getNamespace()).orElse("")) + .replaceAll("\\{stream}", Optional.ofNullable(pair.getName()).orElse(""))); + return PulsarUtils.buildProducer(client, Schema.generic(PulsarDestinationConfig.getSchemaInfo()), config.getProducerConfig(), + config.uriForTopic(topic)); + }, (existing, newValue) -> existing)); + } + + private void sendRecord(final Producer producer, final GenericRecord record) { + producer.sendAsync(record); + if (config.isSync()) { + try { + producer.flush(); + } catch (PulsarClientException e) { + LOGGER.error("Error sending message to topic.", e); + throw new RuntimeException("Cannot send message to Pulsar. Error: " + e.getMessage(), e); + } + } + } + + @Override + protected void close(final boolean hasFailed) { + producerMap.values().forEach(producer -> { + Exceptions.swallow(producer::flush); + Exceptions.swallow(producer::close); + }); + Exceptions.swallow(client::close); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java new file mode 100644 index 0000000..d851cc6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/java/io/airbyte/integrations/destination/pulsar/PulsarUtils.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import java.util.Map; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.PulsarClientException; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.schema.GenericRecord; + +class PulsarUtils { + + static PulsarClient buildClient(final String serviceUrl) { + try { + return PulsarClient.builder() + .serviceUrl(serviceUrl) + .build(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar client", e); + } + } + + static Producer buildProducer(final PulsarClient client, + final Schema schema, + final Map config, + final String topic) { + try { + return client.newProducer(schema) + .loadConf(config) + .topic(topic) + .create(); + } catch (PulsarClientException e) { + throw new RuntimeException("Error creating the Pulsar producer", e); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json new file mode 100644 index 0000000..e31691e --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/main/resources/spec.json @@ -0,0 +1,137 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/pulsar", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Pulsar Destination Spec", + "type": "object", + "required": [ + "brokers", + "use_tls", + "topic_type", + "topic_tenant", + "topic_namespace", + "topic_pattern", + "compression_type", + "send_timeout_ms", + "max_pending_messages", + "max_pending_messages_across_partitions", + "batching_enabled", + "batching_max_messages", + "batching_max_publish_delay", + "block_if_queue_full" + ], + "additionalProperties": true, + "properties": { + "brokers": { + "title": "Pulsar brokers", + "description": "A list of host/port pairs to use for establishing the initial connection to the Pulsar cluster.", + "type": "string", + "examples": ["broker1:6650,broker2:6650"] + }, + "use_tls": { + "title": "Use TLS", + "description": "Whether to use TLS encryption on the connection.", + "type": "boolean", + "default": false + }, + "topic_type": { + "title": "Topic type", + "description": "It identifies type of topic. Pulsar supports two kind of topics: persistent and non-persistent. In persistent topic, all messages are durably persisted on disk (that means on multiple disks unless the broker is standalone), whereas non-persistent topic does not persist message into storage disk.", + "type": "string", + "default": "persistent", + "enum": ["persistent", "non-persistent"] + }, + "topic_tenant": { + "title": "Topic tenant", + "description": "The topic tenant within the instance. Tenants are essential to multi-tenancy in Pulsar, and spread across clusters.", + "type": "string", + "default": "public", + "examples": ["public"] + }, + "topic_namespace": { + "title": "Topic namespace", + "description": "The administrative unit of the topic, which acts as a grouping mechanism for related topics. Most topic configuration is performed at the namespace level. Each tenant has one or multiple namespaces.", + "type": "string", + "default": "default", + "examples": ["default"] + }, + "topic_pattern": { + "title": "Topic pattern", + "description": "Topic pattern in which the records will be sent. You can use patterns like '{namespace}' and/or '{stream}' to send the message to a specific topic based on these values. Notice that the topic name will be transformed to a standard naming convention.", + "type": "string", + "examples": ["sample.topic", "{namespace}.{stream}.sample"] + }, + "topic_test": { + "title": "Test topic", + "description": "Topic to test if Airbyte can produce messages.", + "type": "string", + "examples": ["test.topic"] + }, + "producer_name": { + "title": "Producer name", + "description": "Name for the producer. If not filled, the system will generate a globally unique name which can be accessed with.", + "type": "string", + "examples": ["airbyte-producer"] + }, + "producer_sync": { + "title": "Sync producer", + "description": "Wait synchronously until the record has been sent to Pulsar.", + "type": "boolean", + "default": false + }, + "compression_type": { + "title": "Compression type", + "description": "Compression type for the producer.", + "type": "string", + "default": "NONE", + "enum": ["NONE", "LZ4", "ZLIB", "ZSTD", "SNAPPY"] + }, + "send_timeout_ms": { + "title": "Message send timeout", + "description": "If a message is not acknowledged by a server before the send-timeout expires, an error occurs (in ms).", + "type": "integer", + "default": 30000 + }, + "max_pending_messages": { + "title": "Max pending messages", + "description": "The maximum size of a queue holding pending messages.", + "type": "integer", + "default": 1000 + }, + "max_pending_messages_across_partitions": { + "title": "Max pending messages across partitions", + "description": "The maximum number of pending messages across partitions.", + "type": "integer", + "default": 50000 + }, + "batching_enabled": { + "title": "Enable batching", + "description": "Control whether automatic batching of messages is enabled for the producer.", + "type": "boolean", + "default": true + }, + "batching_max_messages": { + "title": "Batching max messages", + "description": "Maximum number of messages permitted in a batch.", + "type": "integer", + "default": 1000 + }, + "batching_max_publish_delay": { + "title": "Batching max publish delay", + "description": " Time period in milliseconds within which the messages sent will be batched.", + "type": "integer", + "default": 1 + }, + "block_if_queue_full": { + "title": "Block if queue is full", + "description": "If the send operation should block when the outgoing message queue is full.", + "type": "boolean", + "default": false + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java new file mode 100644 index 0000000..8b2dcd6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test-integration/java/io/airbyte/integrations/destination/pulsar/PulsarDestinationAcceptanceTest.java @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import java.io.IOException; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Consumer; +import org.apache.pulsar.client.api.Message; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.Schema; +import org.apache.pulsar.client.api.SubscriptionInitialPosition; +import org.apache.pulsar.client.api.SubscriptionType; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +public class PulsarDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final String TOPIC_NAME = "test.topic"; + private static final ObjectReader READER = new ObjectMapper().reader(); + + private static PulsarContainer PULSAR; + + private final NamingConventionTransformer namingResolver = new StandardNameTransformer(); + + @Override + protected String getImageName() { + return "airbyte/destination-pulsar:dev"; + } + + @Override + protected JsonNode getConfig() throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_name", "test-producer-" + UUID.randomUUID()) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", false) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", PULSAR.getHost() + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .put("use_tls", false) + .put("topic_pattern", "{namespace}.{stream}." + TOPIC_NAME) + .put("producer_sync", true) + .put("producer_name", "test-producer") + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("block_if_queue_full", true) + .build()); + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + return ""; + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) + throws IOException { + return retrieveRecords(testEnv, streamName, namespace, null); + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws IOException { + final PulsarClient client = PulsarClient.builder() + .serviceUrl(PULSAR.getPulsarBrokerUrl()) + .build(); + final String topic = namingResolver.getIdentifier(namespace + "." + streamName + "." + TOPIC_NAME); + final Consumer consumer = client.newConsumer(Schema.AUTO_CONSUME()) + .topic(topic) + .subscriptionName("test-subscription-" + UUID.randomUUID()) + .enableRetry(true) + .subscriptionType(SubscriptionType.Exclusive) + .subscriptionInitialPosition(SubscriptionInitialPosition.Earliest) + .subscribe(); + + final List records = new ArrayList<>(); + while (!consumer.hasReachedEndOfTopic()) { + Message message = consumer.receive(5, TimeUnit.SECONDS); + if (message == null) { + break; + } + records.add(READER.readTree(Base64.getDecoder().decode(message.getValue().getField(PulsarDestination.COLUMN_NAME_DATA).toString()))); + Exceptions.swallow(() -> consumer.acknowledge(message)); + } + consumer.unsubscribe(); + consumer.close(); + client.close(); + + return records; + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + @Override + protected void setup(final TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) { + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + PULSAR.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java new file mode 100644 index 0000000..07750e2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-pulsar/src/test/java/io/airbyte/integrations/destination/pulsar/PulsarRecordConsumerTest.java @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.pulsar; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; +import com.google.common.collect.Streams; +import com.google.common.net.InetAddresses; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.AirbyteStream; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import java.net.InetAddress; +import java.net.NetworkInterface; +import java.net.SocketException; +import java.net.UnknownHostException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.apache.pulsar.client.api.Producer; +import org.apache.pulsar.client.api.PulsarClient; +import org.apache.pulsar.client.api.schema.GenericRecord; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.ArgumentsProvider; +import org.junit.jupiter.params.provider.ArgumentsSource; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.testcontainers.containers.PulsarContainer; +import org.testcontainers.utility.DockerImageName; + +@DisplayName("PulsarRecordConsumer") +@ExtendWith(MockitoExtension.class) +public class PulsarRecordConsumerTest extends PerStreamStateMessageTest { + + @Mock + private Consumer outputRecordCollector; + + private PulsarRecordConsumer consumer; + + @Mock + private PulsarDestinationConfig config; + + @Mock + private ConfiguredAirbyteCatalog catalog; + + @Mock + private PulsarClient pulsarClient; + + private static final StandardNameTransformer NAMING_RESOLVER = new StandardNameTransformer(); + + private static PulsarContainer PULSAR; + + @ParameterizedTest + @ArgumentsSource(TopicMapArgumentsProvider.class) + @SuppressWarnings("unchecked") + public void testBuildProducerMap(final ConfiguredAirbyteCatalog catalog, + final String streamName, + final String namespace, + final String topicPattern, + final String expectedTopic) + throws UnknownHostException { + String brokers = Stream.concat(getIpAddresses().stream(), Stream.of("localhost")) + .map(ip -> ip + ":" + PULSAR.getMappedPort(PulsarContainer.BROKER_PORT)) + .collect(Collectors.joining(",")); + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(brokers, topicPattern)); + final PulsarClient pulsarClient = PulsarUtils.buildClient(config.getServiceUrl()); + final PulsarRecordConsumer recordConsumer = new PulsarRecordConsumer(config, catalog, pulsarClient, outputRecordCollector, NAMING_RESOLVER); + final Map> producerMap = recordConsumer.buildProducerMap(); + assertEquals(Sets.newHashSet(catalog.getStreams()).size(), producerMap.size()); + + final AirbyteStreamNameNamespacePair streamNameNamespacePair = new AirbyteStreamNameNamespacePair(streamName, namespace); + assertEquals(expectedTopic, producerMap.get(streamNameNamespacePair).getTopic()); + } + + @Test + @SuppressWarnings("unchecked") + void testCannotConnectToBrokers() throws Exception { + final PulsarDestinationConfig config = PulsarDestinationConfig + .getPulsarDestinationConfig(getConfig(PULSAR.getHost() + ":" + (PULSAR.getMappedPort(PulsarContainer.BROKER_PORT) + 10), "test-topic")); + + final String streamName = "test-stream"; + final String namespace = "test-schema"; + final ConfiguredAirbyteCatalog catalog = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + streamName, + namespace, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)))); + final PulsarClient pulsarClient = PulsarUtils.buildClient(config.getServiceUrl()); + final PulsarRecordConsumer consumer = new PulsarRecordConsumer(config, catalog, pulsarClient, outputRecordCollector, NAMING_RESOLVER); + final List expectedRecords = getNRecords(10, streamName, namespace); + + assertThrows(RuntimeException.class, consumer::start); + + expectedRecords.forEach(m -> assertThrows(RuntimeException.class, () -> consumer.accept(m))); + + consumer.accept(new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(namespace + "." + streamName, 0))))); + consumer.close(); + } + + private JsonNode getConfig(final String brokers, final String topic) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("brokers", brokers) + .put("use_tls", false) + .put("topic_type", "non-persistent") + .put("topic_tenant", "public") + .put("topic_namespace", "default") + .put("topic_pattern", topic) + .put("producer_sync", true) + .put("compression_type", "NONE") + .put("send_timeout_ms", 30000) + .put("max_pending_messages", 1000) + .put("max_pending_messages_across_partitions", 50000) + .put("batching_enabled", true) + .put("batching_max_messages", 1000) + .put("batching_max_publish_delay", 1) + .put("block_if_queue_full", true) + .build()); + } + + private List getNRecords(final int n, final String streamName, final String namespace) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(streamName) + .withNamespace(namespace) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + + } + + @SuppressWarnings("UnstableApiUsage") + private List getIpAddresses() throws UnknownHostException { + try { + return Streams.stream(NetworkInterface.getNetworkInterfaces().asIterator()) + .flatMap(ni -> Streams.stream(ni.getInetAddresses().asIterator())) + .map(InetAddress::getHostAddress) + .filter(InetAddresses::isUriInetAddress) + .collect(Collectors.toList()); + } catch (SocketException e) { + return Collections.singletonList(InetAddress.getLocalHost().getHostAddress()); + } + } + + public static class TopicMapArgumentsProvider implements ArgumentsProvider { + + private static final String TOPIC_NAME = "test.topic"; + private static final String SCHEMA_NAME1 = "public"; + private static final String STREAM_NAME1 = "id_and_name"; + private static final String SCHEMA_NAME2 = SCHEMA_NAME1 + 2; + private static final String STREAM_NAME2 = STREAM_NAME1 + 2; + + private final ConfiguredAirbyteStream stream1 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME1, + STREAM_NAME1, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)); + private final ConfiguredAirbyteStream stream2 = CatalogHelpers.createConfiguredAirbyteStream( + SCHEMA_NAME2, + STREAM_NAME2, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)); + + @Override + public Stream provideArguments(final ExtensionContext context) { + final String prefix = "non-persistent://public/default/"; + + final List catalogs = new ArrayList<>(); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream1))); + catalogs.add(new ConfiguredAirbyteCatalog().withStreams(List.of(stream1, stream2))); + + return catalogs.stream() + .flatMap(catalog -> catalog.getStreams().stream() + .map(stream -> buildArgs(catalog, stream.getStream(), prefix)) + .flatMap(Collection::stream)); + } + + private List buildArgs(final ConfiguredAirbyteCatalog catalog, final AirbyteStream stream, final String prefix) { + final String transformedTopic = NAMING_RESOLVER.getIdentifier(TOPIC_NAME); + final String transformedName = NAMING_RESOLVER.getIdentifier(stream.getName()); + final String transformedNamespace = NAMING_RESOLVER.getIdentifier(stream.getNamespace()); + + return ImmutableList.of( + Arguments.of(catalog, stream.getName(), stream.getNamespace(), TOPIC_NAME, prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "test-topic", prefix + "test_topic"), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}", prefix + transformedNamespace), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{stream}", prefix + transformedName), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}.{stream}." + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "{namespace}-{stream}-" + TOPIC_NAME, + prefix + transformedNamespace + "_" + transformedName + "_" + transformedTopic), + Arguments.of(catalog, stream.getName(), stream.getNamespace(), "topic with spaces", prefix + "topic_with_spaces")); + } + + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + + @BeforeEach + void setup() { + // TODO: Unit tests should not use Testcontainers + PULSAR = new PulsarContainer(DockerImageName.parse("apachepulsar/pulsar:2.8.1")); + PULSAR.start(); + consumer = new PulsarRecordConsumer(config, catalog, pulsarClient, outputRecordCollector, NAMING_RESOLVER); + } + + @AfterEach + void tearDown() { + PULSAR.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/README.md b/airbyte-integrations/connectors/destination-r2/README.md new file mode 100644 index 0000000..229c1d0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/README.md @@ -0,0 +1,74 @@ +# Destination R2 + +This is the repository for the R2 destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/r2). + +**Currently, no integration test has been set up for this connector, which requires either a local R2 container, or a remote R2 account.** + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-r2:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-r2:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-r2:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-r2:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-r2:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-r2:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-r2:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/r2`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/r2DestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-r2:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-r2:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-r2 test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/r2.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-r2/build.gradle b/airbyte-integrations/connectors/destination-r2/build.gradle new file mode 100644 index 0000000..94626b9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/build.gradle @@ -0,0 +1,49 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.r2.R2Destination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + // csv + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.978' + implementation 'org.apache.commons:commons-csv:1.4' + implementation 'com.github.alexmojaki:s3-stream-upload:2.2.2' + + // parquet + implementation ('org.apache.hadoop:hadoop-common:3.3.3') { + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.slf4j', module: 'slf4j-reload4j' + } + implementation ('org.apache.hadoop:hadoop-aws:3.3.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'} + implementation ('org.apache.hadoop:hadoop-mapreduce-client-core:3.3.3') { + exclude group: 'org.slf4j', module: 'slf4j-log4j12' + exclude group: 'org.slf4j', module: 'slf4j-reload4j' + } + implementation ('org.apache.parquet:parquet-avro:1.12.3') { exclude group: 'org.slf4j', module: 'slf4j-log4j12'} + implementation ('com.github.airbytehq:json-avro-converter:1.1.0') { exclude group: 'ch.qos.logback', module: 'logback-classic'} + + testImplementation 'org.apache.commons:commons-lang3:3.11' + testImplementation 'org.xerial.snappy:snappy-java:1.1.8.4' + testImplementation "org.mockito:mockito-inline:4.1.0" +} diff --git a/airbyte-integrations/connectors/destination-r2/icon.svg b/airbyte-integrations/connectors/destination-r2/icon.svg new file mode 100644 index 0000000..f9b9e46 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-r2/metadata.yaml b/airbyte-integrations/connectors/destination-r2/metadata.yaml new file mode 100644 index 0000000..d9a0911 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: file + connectorType: destination + definitionId: 0fb07be9-7c3b-4336-850d-5efc006152ee + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-r2 + githubIssueLabel: destination-r2 + icon: cloudflare-r2.svg + license: MIT + name: Cloudflare R2 + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/r2 + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-r2/sample_secrets/config.json b/airbyte-integrations/connectors/destination-r2/sample_secrets/config.json new file mode 100644 index 0000000..db86c13 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/sample_secrets/config.json @@ -0,0 +1,7 @@ +{ + "s3_bucket_name": "paste-bucket-name-here", + "s3_bucket_path": "integration-test", + "account_id": "paster-account-id-here", + "access_key_id": "paste-access-key-id-here", + "secret_access_key": "paste-secret-access-key-here" +} diff --git a/airbyte-integrations/connectors/destination-r2/src/main/java/io/airbyte/integrations/destination/r2/R2Destination.java b/airbyte-integrations/connectors/destination-r2/src/main/java/io/airbyte/integrations/destination/r2/R2Destination.java new file mode 100644 index 0000000..1c663ed --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/main/java/io/airbyte/integrations/destination/r2/R2Destination.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.destination.s3.BaseS3Destination; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2Destination extends BaseS3Destination { + + public static void main(String[] args) throws Exception { + System.setProperty("com.amazonaws.services.s3.disableGetObjectMD5Validation", "true"); + System.setProperty("com.amazonaws.services.s3.disablePutObjectMD5Validation", "true"); + new IntegrationRunner(new R2Destination()).run(args); + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-r2/src/main/resources/spec.json new file mode 100644 index 0000000..5274dcf --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/main/resources/spec.json @@ -0,0 +1,296 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/r2", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "R2 Destination Spec", + "type": "object", + "required": [ + "account_id", + "access_key_id", + "secret_access_key", + "s3_bucket_name", + "s3_bucket_path", + "format" + ], + + "properties": { + "account_id": { + "type": "string", + "description": "Cloudflare account ID", + "title": "Cloudflare account ID", + "examples": ["12345678aa1a1a11111aaa1234567abc"], + "order": 0 + }, + "access_key_id": { + "type": "string", + "description": "The access key ID to access the R2 bucket. Airbyte requires Read and Write permissions to the given bucket. Read more here.", + "title": "R2 Key ID", + "airbyte_secret": true, + "examples": ["A012345678910EXAMPLE"], + "order": 1 + }, + "secret_access_key": { + "type": "string", + "description": "The corresponding secret to the access key ID. Read more here", + "title": "R2 Access Key", + "airbyte_secret": true, + "examples": ["a012345678910ABCDEFGHAbCdEfGhEXAMPLEKEY"], + "order": 2 + }, + "s3_bucket_name": { + "title": "R2 Bucket Name", + "type": "string", + "description": "The name of the R2 bucket. Read more here.", + "examples": ["r2_sync"], + "order": 3 + }, + "s3_bucket_path": { + "title": "R2 Bucket Path", + "description": "Directory under the R2 bucket where data will be written.", + "type": "string", + "examples": ["data_sync/test"], + "order": 4 + }, + "format": { + "title": "Output Format", + "type": "object", + "description": "Format of the data output. See here for more details", + "oneOf": [ + { + "title": "Avro: Apache Avro", + "required": ["format_type", "compression_codec"], + "properties": { + "format_type": { + "title": "Format Type", + "type": "string", + "enum": ["Avro"], + "default": "Avro", + "order": 0 + }, + "compression_codec": { + "title": "Compression Codec", + "description": "The compression algorithm used to compress data. Default to no compression.", + "type": "object", + "oneOf": [ + { + "title": "No Compression", + "required": ["codec"], + "properties": { + "codec": { + "type": "string", + "enum": ["no compression"], + "default": "no compression" + } + } + }, + { + "title": "Deflate", + "required": ["codec", "compression_level"], + "properties": { + "codec": { + "type": "string", + "enum": ["Deflate"], + "default": "Deflate" + }, + "compression_level": { + "title": "Deflate Level", + "description": "0: no compression & fastest, 9: best compression & slowest.", + "type": "integer", + "default": 0, + "minimum": 0, + "maximum": 9 + } + } + }, + { + "title": "bzip2", + "required": ["codec"], + "properties": { + "codec": { + "type": "string", + "enum": ["bzip2"], + "default": "bzip2" + } + } + }, + { + "title": "xz", + "required": ["codec", "compression_level"], + "properties": { + "codec": { + "type": "string", + "enum": ["xz"], + "default": "xz" + }, + "compression_level": { + "title": "Compression Level", + "description": "See here for details.", + "type": "integer", + "default": 6, + "minimum": 0, + "maximum": 9 + } + } + }, + { + "title": "zstandard", + "required": ["codec", "compression_level"], + "properties": { + "codec": { + "type": "string", + "enum": ["zstandard"], + "default": "zstandard" + }, + "compression_level": { + "title": "Compression Level", + "description": "Negative levels are 'fast' modes akin to lz4 or snappy, levels above 9 are generally for archival purposes, and levels above 18 use a lot of memory.", + "type": "integer", + "default": 3, + "minimum": -5, + "maximum": 22 + }, + "include_checksum": { + "title": "Include Checksum", + "description": "If true, include a checksum with each data block.", + "type": "boolean", + "default": false + } + } + }, + { + "title": "snappy", + "required": ["codec"], + "properties": { + "codec": { + "type": "string", + "enum": ["snappy"], + "default": "snappy" + } + } + } + ], + "order": 1 + } + } + }, + { + "title": "CSV: Comma-Separated Values", + "required": ["format_type", "flattening"], + "properties": { + "format_type": { + "title": "Format Type", + "type": "string", + "enum": ["CSV"], + "default": "CSV" + }, + "flattening": { + "type": "string", + "title": "Normalization (Flattening)", + "description": "Whether the input json data should be normalized (flattened) in the output CSV. Please refer to docs for details.", + "default": "No flattening", + "enum": ["No flattening", "Root level flattening"] + }, + "compression": { + "title": "Compression", + "type": "object", + "description": "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".csv.gz\").", + "oneOf": [ + { + "title": "No Compression", + "requires": ["compression_type"], + "properties": { + "compression_type": { + "type": "string", + "enum": ["No Compression"], + "default": "No Compression" + } + } + }, + { + "title": "GZIP", + "requires": ["compression_type"], + "properties": { + "compression_type": { + "type": "string", + "enum": ["GZIP"], + "default": "GZIP" + } + } + } + ] + } + } + }, + { + "title": "JSON Lines: Newline-delimited JSON", + "required": ["format_type"], + "properties": { + "format_type": { + "title": "Format Type", + "type": "string", + "enum": ["JSONL"], + "default": "JSONL" + }, + "compression": { + "title": "Compression", + "type": "object", + "description": "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", + "oneOf": [ + { + "title": "No Compression", + "requires": "compression_type", + "properties": { + "compression_type": { + "type": "string", + "enum": ["No Compression"], + "default": "No Compression" + } + } + }, + { + "title": "GZIP", + "requires": "compression_type", + "properties": { + "compression_type": { + "type": "string", + "enum": ["GZIP"], + "default": "GZIP" + } + } + } + ] + } + } + } + ], + "order": 5 + }, + "s3_path_format": { + "title": "R2 Path Format", + "description": "Format string on how data will be organized inside the R2 bucket directory. Read more here", + "type": "string", + "examples": [ + "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" + ], + "order": 6 + }, + "file_name_pattern": { + "type": "string", + "description": "The pattern allows you to set the file-name format for the R2 staging file(s)", + "title": "R2 Filename pattern", + "examples": [ + "{date}", + "{date:yyyy_MM}", + "{timestamp}", + "{part_number}", + "{sync_id}" + ], + "order": 7 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2AvroDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2AvroDestinationAcceptanceTest.java new file mode 100644 index 0000000..ac4b526 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2AvroDestinationAcceptanceTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseAvroDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2AvroDestinationAcceptanceTest extends S3BaseAvroDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvDestinationAcceptanceTest.java new file mode 100644 index 0000000..a9f2b72 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvDestinationAcceptanceTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseCsvDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2CsvDestinationAcceptanceTest extends S3BaseCsvDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvGzipDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvGzipDestinationAcceptanceTest.java new file mode 100644 index 0000000..3b6df01 --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2CsvGzipDestinationAcceptanceTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseCsvGzipDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2CsvGzipDestinationAcceptanceTest extends S3BaseCsvGzipDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlDestinationAcceptanceTest.java new file mode 100644 index 0000000..0385e1c --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlDestinationAcceptanceTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseJsonlDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2JsonlDestinationAcceptanceTest extends S3BaseJsonlDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlGzipDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlGzipDestinationAcceptanceTest.java new file mode 100644 index 0000000..13506ab --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2JsonlGzipDestinationAcceptanceTest.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseJsonlGzipDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; + +public class R2JsonlGzipDestinationAcceptanceTest extends S3BaseJsonlGzipDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2ParquetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2ParquetDestinationAcceptanceTest.java new file mode 100644 index 0000000..6393a0e --- /dev/null +++ b/airbyte-integrations/connectors/destination-r2/src/test-integration/java/io/airbyte/integrations/destination/r2/R2ParquetDestinationAcceptanceTest.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.r2; + +import io.airbyte.cdk.integrations.destination.s3.S3BaseParquetDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.destination.s3.StorageProvider; +import org.junit.jupiter.api.Disabled; + +/** + * s3a client not supported by cloudflare R2 + */ +@Disabled +public class R2ParquetDestinationAcceptanceTest extends S3BaseParquetDestinationAcceptanceTest { + + @Override + protected String getImageName() { + return "airbyte/destination-r2:dev"; + } + + @Override + public StorageProvider storageProvider() { + return StorageProvider.CF_R2; + } + +} diff --git a/airbyte-integrations/connectors/destination-rabbitmq/.dockerignore b/airbyte-integrations/connectors/destination-rabbitmq/.dockerignore new file mode 100644 index 0000000..f3757e1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_rabbitmq +!setup.py diff --git a/airbyte-integrations/connectors/destination-rabbitmq/Dockerfile b/airbyte-integrations/connectors/destination-rabbitmq/Dockerfile new file mode 100644 index 0000000..ebf1791 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_rabbitmq ./destination_rabbitmq + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.name=airbyte/destination-rabbitmq diff --git a/airbyte-integrations/connectors/destination-rabbitmq/README.md b/airbyte-integrations/connectors/destination-rabbitmq/README.md new file mode 100644 index 0000000..f695202 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/README.md @@ -0,0 +1,99 @@ +# Rabbitmq Destination + +This is the repository for the Rabbitmq destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/rabbitmq). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/rabbitmq) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_rabbitmq/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination rabbitmq test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-rabbitmq build +``` + +An image will be built with the tag `airbyte/destination-rabbitmq:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-rabbitmq:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-rabbitmq:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-rabbitmq:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-rabbitmq:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-rabbitmq test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-rabbitmq test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/rabbitmq.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.py b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.py new file mode 100644 index 0000000..db4e71f --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2021 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationRabbitmq + +__all__ = ["DestinationRabbitmq"] diff --git a/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.pyc b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.pyc new file mode 100644 index 0000000..7f48a3b Binary files /dev/null and b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/__init__.pyc differ diff --git a/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/destination.py b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/destination.py new file mode 100644 index 0000000..162a7a0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/destination.py @@ -0,0 +1,84 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +from typing import Any, Iterable, Mapping + +import pika +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status, Type +from pika.adapters.blocking_connection import BlockingConnection +from pika.spec import BasicProperties + +_DEFAULT_PORT = 5672 + + +def create_connection(config: Mapping[str, Any]) -> BlockingConnection: + host = config.get("host") + port = config.get("port") or _DEFAULT_PORT + username = config.get("username") + password = config.get("password") + virtual_host = config.get("virtual_host", "") + ssl_enabled = config.get("ssl", False) + amqp_protocol = "amqp" + host_url = host + if ssl_enabled: + amqp_protocol = "amqps" + if port: + host_url = host + ":" + str(port) + credentials = f"{username}:{password}@" if username and password else "" + params = pika.URLParameters(f"{amqp_protocol}://{credentials}{host_url}/{virtual_host}") + return BlockingConnection(params) + + +class DestinationRabbitmq(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + exchange = config.get("exchange") + routing_key = config["routing_key"] + connection = create_connection(config=config) + channel = connection.channel() + + streams = {s.stream.name for s in configured_catalog.streams} + try: + for message in input_messages: + if message.type == Type.STATE: + # Emitting a state message means all records that came before it + # have already been published. + yield message + elif message.type == Type.RECORD: + record = message.record + if record.stream not in streams: + # Message contains record from a stream that is not in the catalog. Skip it! + continue + headers = {"stream": record.stream, "emitted_at": record.emitted_at, "namespace": record.namespace} + properties = BasicProperties(content_type="application/json", headers=headers) + channel.basic_publish( + exchange=exchange or "", routing_key=routing_key, properties=properties, body=json.dumps(record.data) + ) + else: + # Let's ignore other message types for now + continue + finally: + connection.close() + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + try: + connection = create_connection(config=config) + except Exception as e: + logger.error(f"Failed to create connection. Error: {e}") + return AirbyteConnectionStatus(status=Status.FAILED, message=f"Could not create connection: {repr(e)}") + try: + channel = connection.channel() + if channel.is_open: + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + return AirbyteConnectionStatus(status=Status.FAILED, message="Could not open channel") + except Exception as e: + logger.error(f"Failed to open RabbitMQ channel. Error: {e}") + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") + finally: + connection.close() diff --git a/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/spec.json b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/spec.json new file mode 100644 index 0000000..cbeb330 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/destination_rabbitmq/spec.json @@ -0,0 +1,49 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/rabbitmq", + "supported_destination_sync_modes": ["append"], + "supportsIncremental": true, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Rabbitmq", + "type": "object", + "required": ["host", "routing_key"], + "additionalProperties": false, + "properties": { + "ssl": { + "type": "boolean", + "description": "SSL enabled.", + "default": true + }, + "host": { + "type": "string", + "description": "The RabbitMQ host name." + }, + "port": { + "type": "integer", + "description": "The RabbitMQ port." + }, + "virtual_host": { + "type": "string", + "description": "The RabbitMQ virtual host name." + }, + "username": { + "type": "string", + "description": "The username to connect." + }, + "password": { + "type": "string", + "title": "Password", + "description": "The password to connect.", + "airbyte_secret": true + }, + "exchange": { + "type": "string", + "description": "The exchange name." + }, + "routing_key": { + "type": "string", + "description": "The routing key." + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-rabbitmq/icon.svg b/airbyte-integrations/connectors/destination-rabbitmq/icon.svg new file mode 100644 index 0000000..dacd88f --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/integration_test.py new file mode 100644 index 0000000..f99c641 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/integration_test.py @@ -0,0 +1,90 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from unittest.mock import Mock + +from airbyte_cdk.models import AirbyteMessage, Status, Type +from airbyte_cdk.models.airbyte_protocol import ( + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + SyncMode, +) +from destination_rabbitmq.destination import DestinationRabbitmq, create_connection + +TEST_STREAM = "animals" +TEST_NAMESPACE = "test_namespace" +TEST_MESSAGE = {"name": "cat"} + + +def _configured_catalog() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"name": {"type": "string"}}} + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name=TEST_STREAM, json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + return ConfiguredAirbyteCatalog(streams=[append_stream]) + + +def consume(config): + connection = create_connection(config=config) + channel = connection.channel() + + def assert_message(ch, method, properties, body): + assert json.loads(body) == TEST_MESSAGE + assert properties.content_type == "application/json" + assert properties.headers["stream"] == TEST_STREAM + assert properties.headers["namespace"] == TEST_NAMESPACE + assert "emitted_at" in properties.headers + channel.stop_consuming() + + channel.basic_consume(queue=config["routing_key"], on_message_callback=assert_message, auto_ack=True) + channel.start_consuming() + + +def _state() -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={})) + + +def _record() -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, record=AirbyteRecordMessage(stream=TEST_STREAM, data=TEST_MESSAGE, emitted_at=0, namespace=TEST_NAMESPACE) + ) + + +def test_check_fails(): + f = open( + "integration_tests/invalid_config.json", + ) + config = json.load(f) + destination = DestinationRabbitmq() + status = destination.check(logger=Mock(), config=config) + assert status.status == Status.FAILED + + +def test_check_succeeds(): + f = open( + "secrets/config.json", + ) + config = json.load(f) + destination = DestinationRabbitmq() + status = destination.check(logger=Mock(), config=config) + assert status.status == Status.SUCCEEDED + + +def test_write(): + f = open( + "secrets/config.json", + ) + config = json.load(f) + messages = [_record(), _state()] + destination = DestinationRabbitmq() + for m in destination.write(config=config, configured_catalog=_configured_catalog(), input_messages=messages): + assert m.type == Type.STATE + consume(config) diff --git a/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/invalid_config.json b/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/invalid_config.json new file mode 100644 index 0000000..a482e47 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/integration_tests/invalid_config.json @@ -0,0 +1,9 @@ +{ + "host": "invalid.host.io", + "port": 5672, + "virtual_host": "invalid_vh", + "username": "invalid_username", + "password": "invalid_password", + "routing_key": "test_queue", + "exchange": "test_exchange" +} diff --git a/airbyte-integrations/connectors/destination-rabbitmq/main.py b/airbyte-integrations/connectors/destination-rabbitmq/main.py new file mode 100644 index 0000000..fc09374 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_rabbitmq import DestinationRabbitmq + +if __name__ == "__main__": + DestinationRabbitmq().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-rabbitmq/metadata.yaml b/airbyte-integrations/connectors/destination-rabbitmq/metadata.yaml new file mode 100644 index 0000000..56bd877 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: e06ad785-ad6f-4647-b2e8-3027a5c59454 + dockerImageTag: 0.1.1 + dockerRepository: airbyte/destination-rabbitmq + githubIssueLabel: destination-rabbitmq + icon: pulsar.svg + license: MIT + name: RabbitMQ + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/rabbitmq + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-rabbitmq/requirements.txt b/airbyte-integrations/connectors/destination-rabbitmq/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-kvdb/setup.py b/airbyte-integrations/connectors/destination-rabbitmq/setup.py similarity index 72% rename from airbyte-integrations/connectors/destination-kvdb/setup.py rename to airbyte-integrations/connectors/destination-rabbitmq/setup.py index dab5520..352ded5 100644 --- a/airbyte-integrations/connectors/destination-kvdb/setup.py +++ b/airbyte-integrations/connectors/destination-rabbitmq/setup.py @@ -5,16 +5,13 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = [ - "airbyte-cdk", - "requests", -] +MAIN_REQUIREMENTS = ["airbyte-cdk", "pika>=1.1.0"] TEST_REQUIREMENTS = ["pytest~=6.1"] setup( - name="destination_kvdb", - description="Destination implementation for Kvdb.", + name="destination_rabbitmq", + description="Destination implementation for Rabbitmq.", author="Airbyte", author_email="contact@airbyte.io", packages=find_packages(), diff --git a/airbyte-integrations/connectors/destination-rabbitmq/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-rabbitmq/unit_tests/unit_test.py new file mode 100644 index 0000000..57c34b6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rabbitmq/unit_tests/unit_test.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from typing import Any, Dict +from unittest import mock +from unittest.mock import Mock + +from airbyte_cdk.models import AirbyteMessage, Status, Type +from airbyte_cdk.models.airbyte_protocol import ( + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + SyncMode, +) +from destination_rabbitmq.destination import DestinationRabbitmq +from pika.spec import Queue + +config = { + "host": "test.rabbitmq", + "port": 5672, + "virtual_host": "test_vh", + "username": "john.doe", + "password": "secret", + "exchange": "test_exchange", + "routing_key": "test_routing_key", +} + + +def _init_mocks(connection_init): + connection, channel = Mock(), Mock() + connection_init.return_value = connection + connection.channel.return_value = channel + return channel + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_check_succeeds(connection_init): + result = Mock() + result.method = Queue.DeclareOk() + channel = _init_mocks(connection_init=connection_init) + channel.queue_declare.return_value = result + destination = DestinationRabbitmq() + status = destination.check(logger=Mock(), config=config) + assert status.status == Status.SUCCEEDED + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_check_fails_on_getting_channel(connection_init): + connection = Mock() + connection_init.return_value = connection + connection.channel.side_effect = Exception("Failed to get channel") + destination = DestinationRabbitmq() + status = destination.check(logger=Mock(), config=config) + assert status.status == Status.FAILED + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_check_fails_on_creating_connection(connection_init): + connection_init.side_effect = Exception("Could not open connection") + destination = DestinationRabbitmq() + status = destination.check(logger=Mock(), config=config) + assert status.status == Status.FAILED + + +def _state() -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={})) + + +def _record(stream: str, data: Dict[str, Any]) -> AirbyteMessage: + return AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data=data, emitted_at=0)) + + +def _configured_catalog() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"name": {"type": "string"}, "email": {"type": "string"}}} + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="people", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + return ConfiguredAirbyteCatalog(streams=[append_stream]) + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_write_succeeds(connection_init): + stream = "people" + data = {"name": "John Doe", "email": "john.doe@example.com"} + channel = _init_mocks(connection_init=connection_init) + input_messages = [_record(stream=stream, data=data), _state()] + destination = DestinationRabbitmq() + for m in destination.write(config=config, configured_catalog=_configured_catalog(), input_messages=input_messages): + assert m.type == Type.STATE + _, _, args = channel.basic_publish.mock_calls[0] + assert args["exchange"] == "test_exchange" + assert args["routing_key"] == "test_routing_key" + assert args["properties"].content_type == "application/json" + assert args["properties"].headers["stream"] == stream + assert json.loads(args["body"]) == data + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_write_succeeds_with_direct_exchange(connection_init): + stream = "people" + data = {"name": "John Doe", "email": "john.doe@example.com"} + channel = _init_mocks(connection_init=connection_init) + input_messages = [_record(stream=stream, data=data), _state()] + custom_config = dict(config) + del custom_config["exchange"] + destination = DestinationRabbitmq() + for m in destination.write(config=custom_config, configured_catalog=_configured_catalog(), input_messages=input_messages): + assert m.type == Type.STATE + _, _, args = channel.basic_publish.mock_calls[0] + assert args["exchange"] == "" + assert json.loads(args["body"]) == data + + +@mock.patch("destination_rabbitmq.destination.BlockingConnection") +def test_write_skips_message_from_unknown_stream(connection_init): + stream = "shapes" + data = {"name": "Rectangle", "color": "blue"} + channel = _init_mocks(connection_init=connection_init) + input_messages = [_record(stream=stream, data=data), _state()] + destination = DestinationRabbitmq() + for m in destination.write(config=config, configured_catalog=_configured_catalog(), input_messages=input_messages): + assert m.type == Type.STATE + channel.basic_publish.assert_not_called() diff --git a/airbyte-integrations/connectors/destination-redpanda/README.md b/airbyte-integrations/connectors/destination-redpanda/README.md new file mode 100644 index 0000000..6f9f022 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/README.md @@ -0,0 +1,72 @@ +# Destination Redpanda + +This is the repository for the Redpanda destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/redpanda). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-redpanda:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-redpanda:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-redpanda:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-redpanda:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-redpanda:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-redpanda:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-redpanda:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/redpanda`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/redpandaDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-redpanda:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-redpanda:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-redpanda test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/redpanda.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-redpanda/build.gradle b/airbyte-integrations/connectors/destination-redpanda/build.gradle new file mode 100644 index 0000000..a79982f --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/build.gradle @@ -0,0 +1,32 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.redpanda.RedpandaDestination' +} + +dependencies { + + // https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients + implementation 'org.apache.kafka:kafka-clients:3.3.1' + implementation 'org.apache.kafka:connect-json:3.3.1' + + testImplementation "org.testcontainers:redpanda:1.17.5" +} diff --git a/airbyte-integrations/connectors/destination-redpanda/icon.svg b/airbyte-integrations/connectors/destination-redpanda/icon.svg new file mode 100644 index 0000000..45b4da3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-redpanda/metadata.yaml b/airbyte-integrations/connectors/destination-redpanda/metadata.yaml new file mode 100644 index 0000000..eb046e0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 825c5ee3-ed9a-4dd1-a2b6-79ed722f7b13 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-redpanda + githubIssueLabel: destination-redpanda + icon: redpanda.svg + license: MIT + name: Redpanda + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/redpanda + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaConfig.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaConfig.java new file mode 100644 index 0000000..b8ecbc0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaConfig.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Map; +import java.util.Optional; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerConfig; + +public class RedpandaConfig { + + // host1:port1,host2:port2,... + private final String bootstrapServers; + + private final long bufferMemory; + + private final String compressionType; + + private final int retries; + + private final int batchSize; + + private final Optional topicNumPartitions; + + private final Optional topicReplicationFactor; + + private final int socketConnectionSetupTimeoutMs; + + private final int socketConnectionSetupTimeoutMaxMs; + + private RedpandaConfig(String bootstrapServers, + long bufferMemory, + String compressionType, + int retries, + int batchSize, + Optional topicNumPartitions, + Optional topicReplicationFactor, + int socketConnectionSetupTimeoutMs, + int socketConnectionSetupTimeoutMaxMs) { + this.bootstrapServers = bootstrapServers; + this.bufferMemory = bufferMemory; + this.compressionType = compressionType; + this.retries = retries; + this.batchSize = batchSize; + this.topicNumPartitions = topicNumPartitions; + this.topicReplicationFactor = topicReplicationFactor; + this.socketConnectionSetupTimeoutMs = socketConnectionSetupTimeoutMs; + this.socketConnectionSetupTimeoutMaxMs = socketConnectionSetupTimeoutMaxMs; + } + + public static RedpandaConfig createConfig(JsonNode jsonConfig) { + return new RedpandaConfig( + jsonConfig.get("bootstrap_servers").asText(), + jsonConfig.get("buffer_memory").asLong(33554432L), + jsonConfig.get("compression_type").asText("none"), + jsonConfig.get("retries").asInt(5), + jsonConfig.get("batch_size").asInt(16384), + Optional.of(jsonConfig.get("topic_num_partitions").asInt(1)), + Optional.of(((Integer) jsonConfig.get("topic_replication_factor").asInt(1)).shortValue()), + jsonConfig.get("socket_connection_setup_timeout_ms").asInt(10000), + jsonConfig.get("socket_connection_setup_timeout_max_ms").asInt(30000)); + } + + public KafkaProducer createKafkaProducer() { + return new KafkaProducer<>(Map.of( + ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers, + ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringSerializer", + ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonSerializer", + ProducerConfig.BUFFER_MEMORY_CONFIG, bufferMemory, + ProducerConfig.COMPRESSION_TYPE_CONFIG, compressionType, + ProducerConfig.RETRIES_CONFIG, retries, + ProducerConfig.BATCH_SIZE_CONFIG, batchSize, + ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG, socketConnectionSetupTimeoutMs, + ProducerConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG, socketConnectionSetupTimeoutMaxMs)); + + } + + public Admin createAdminClient() { + return AdminClient.create(Map.of( + AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers, + AdminClientConfig.RETRIES_CONFIG, retries, + AdminClientConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG, socketConnectionSetupTimeoutMs, + AdminClientConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG, socketConnectionSetupTimeoutMaxMs)); + } + + public Optional topicNumPartitions() { + return topicNumPartitions; + } + + public Optional topicReplicationFactor() { + return topicReplicationFactor; + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaDestination.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaDestination.java new file mode 100644 index 0000000..93b6e3d --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaDestination.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RedpandaDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(RedpandaDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new RedpandaDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + String topicName = "namespace.stream"; + RedpandaOperations redpandaOperations = null; + try { + RedpandaConfig redpandaConfig = RedpandaConfig.createConfig(config); + redpandaOperations = new RedpandaOperations(redpandaConfig); + redpandaOperations.createTopic( + List.of(new RedpandaOperations.TopicInfo(topicName, Optional.empty(), Optional.empty()))); + redpandaOperations.putRecordBlocking(topicName, UUID.randomUUID().toString(), Jsons.emptyObject()); + redpandaOperations.flush(); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Error while trying to connect to Redpanda: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (redpandaOperations != null) { + try { + redpandaOperations.deleteTopic(List.of(topicName)); + } catch (Exception e) { + LOGGER.error("Error while deleting Redpanda topic: ", e); + } + redpandaOperations.close(); + } + } + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + RedpandaConfig redpandaConfig = RedpandaConfig.createConfig(config); + return new RedpandaMessageConsumer(configuredCatalog, new RedpandaOperations(redpandaConfig), redpandaConfig, + outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaMessageConsumer.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaMessageConsumer.java new file mode 100644 index 0000000..4be72a2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaMessageConsumer.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_AB_ID; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_DATA; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.COLUMN_NAME_EMITTED_AT; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.time.Instant; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RedpandaMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(RedpandaMessageConsumer.class); + + private final Consumer outputRecordCollector; + + private final RedpandaOperations redpandaOperations; + + private final RedpandaConfig redpandaConfig; + + private final Map redpandaWriteConfigs; + + public RedpandaMessageConsumer(ConfiguredAirbyteCatalog configuredCatalog, + RedpandaOperations redpandaOperations, + RedpandaConfig redpandaConfig, + Consumer outputRecordCollector) { + this.outputRecordCollector = outputRecordCollector; + this.redpandaOperations = redpandaOperations; + this.redpandaConfig = redpandaConfig; + this.redpandaWriteConfigs = configuredCatalog.getStreams().stream() + .collect( + Collectors.toUnmodifiableMap(AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + str -> new RedpandaWriteConfig( + new RedpandaNameTransformer().topicName(str.getStream().getNamespace(), + str.getStream().getName()), + str.getDestinationSyncMode()))); + } + + @Override + protected void startTracked() { + redpandaOperations.createTopic(redpandaWriteConfigs.values().stream() + .map(wc -> new RedpandaOperations.TopicInfo(wc.topicName(), redpandaConfig.topicNumPartitions(), + redpandaConfig.topicReplicationFactor())) + .collect(Collectors.toSet())); + } + + @Override + protected void acceptTracked(AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + + var streamConfig = + redpandaWriteConfigs.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + + String key = UUID.randomUUID().toString(); + + var data = Jsons.jsonNode(Map.of( + COLUMN_NAME_AB_ID, key, + COLUMN_NAME_DATA, messageRecord.getData(), + COLUMN_NAME_EMITTED_AT, Instant.now())); + + var topic = streamConfig.topicName(); + + redpandaOperations.putRecord(topic, key, data, e -> { + LOGGER.error("Error while sending record to Redpanda with reason ", e); + try { + throw e; + } catch (Exception ex) { + throw new RuntimeException(ex); + } + }); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + outputRecordCollector.accept(message); + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + @Override + protected void close(boolean hasFailed) { + redpandaOperations.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformer.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformer.java new file mode 100644 index 0000000..b368a4a --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformer.java @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +public class RedpandaNameTransformer extends StandardNameTransformer { + + String topicName(String namespace, String stream) { + namespace = namespace != null ? namespace : ""; + var streamName = namespace + "_" + stream; + streamName = super.convertStreamName(streamName); + // max char length for redpanda topic name is 255 + return streamName.length() > 255 ? streamName.substring(0, 255) : streamName; + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaOperations.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaOperations.java new file mode 100644 index 0000000..c772e1e --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaOperations.java @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import com.fasterxml.jackson.databind.JsonNode; +import java.io.Closeable; +import java.util.Collection; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.errors.TopicExistsException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RedpandaOperations implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(RedpandaOperations.class); + + private final Admin adminClient; + + private final KafkaProducer kafkaProducer; + + public RedpandaOperations(RedpandaConfig redpandaConfig) { + this.adminClient = redpandaConfig.createAdminClient(); + this.kafkaProducer = redpandaConfig.createKafkaProducer(); + } + + public void createTopic(Collection topics) { + var newTopics = topics.stream() + .map(tf -> new NewTopic(tf.name(), tf.numPartitions(), tf.replicationFactor())) + .collect(Collectors.toSet()); + + var createTopicsResult = adminClient.createTopics(newTopics); + + // we need to wait for results since data replication is directly dependent on topic creation + + createTopicsResult.values().values().forEach(f -> { + try { + syncWrapper(() -> f); + } catch (ExecutionException e) { + // errors related to already existing topics should be ignored + if (!(e.getCause() instanceof TopicExistsException)) { + throw new RuntimeException(e); + } + } + }); + } + + public void deleteTopic(Collection topics) { + + var deleteTopicsResult = adminClient.deleteTopics(topics); + + try { + syncWrapper(deleteTopicsResult::all); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + public Set listTopics() { + + var listTopics = adminClient.listTopics(); + + try { + return syncWrapper(listTopics::names); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + + } + + public void putRecord(String topic, String key, JsonNode data, Consumer consumer) { + var producerRecord = new ProducerRecord<>(topic, key, data); + + kafkaProducer.send(producerRecord, ((metadata, exception) -> { + if (exception != null) { + consumer.accept(exception); + } + })); + + } + + // used when testing write permissions on check + public void putRecordBlocking(String topic, String key, JsonNode data) { + + var producerRecord = new ProducerRecord<>(topic, key, data); + + try { + syncWrapper(kafkaProducer::send, producerRecord); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + public void flush() { + kafkaProducer.flush(); + } + + private T syncWrapper(Supplier> asyncFunction) throws ExecutionException { + try { + return asyncFunction.get().get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + + private T syncWrapper(Function, Future> asyncFunction, + ProducerRecord producerRecord) + throws ExecutionException { + return syncWrapper(() -> asyncFunction.apply(producerRecord)); + } + + public record TopicInfo( + + String name, + + Optional numPartitions, + + Optional replicationFactor + + ) { + + } + + @Override + public void close() { + kafkaProducer.flush(); + kafkaProducer.close(); + adminClient.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfig.java b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfig.java new file mode 100644 index 0000000..9af6557 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfig.java @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import io.airbyte.protocol.models.v0.DestinationSyncMode; + +public record RedpandaWriteConfig( + + String topicName, + + DestinationSyncMode destinationSyncMode + +) {} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-redpanda/src/main/resources/spec.json new file mode 100644 index 0000000..89e41c6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/main/resources/spec.json @@ -0,0 +1,76 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/redpanda", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Redpanda destination connector", + "type": "object", + "required": [ + "bootstrap_servers", + "buffer_memory", + "compression_type", + "retries", + "batch_size" + ], + "properties": { + "bootstrap_servers": { + "title": "Bootstrap Servers", + "description": "A list of host/port pairs to use for establishing the initial connection to the Redpanda cluster. The client will make use of all servers irrespective of which servers are specified here for bootstrapping—this list only impacts the initial hosts used to discover the full set of servers. This list should be in the form host1:port1,host2:port2,.... Since these servers are just used for the initial connection to discover the full cluster membership (which may change dynamically), this list need not contain the full set of servers (you may want more than one, though, in case a server is down).", + "type": "string", + "examples": ["redpanda-broker1:9092,redpanda-broker2:9092"] + }, + "buffer_memory": { + "title": "Buffer Memory", + "description": "The total bytes of memory the producer can use to buffer records waiting to be sent to the server.", + "type": "string", + "examples": 33554432 + }, + "compression_type": { + "title": "Compression Type", + "description": "The compression type for all data generated by the producer.", + "type": "string", + "default": "none", + "enum": ["none", "gzip", "snappy", "lz4", "zstd"] + }, + "batch_size": { + "title": "Batch Size", + "description": "The producer will attempt to batch records together into fewer requests whenever multiple records are being sent to the same partition.", + "type": "integer", + "examples": [16384] + }, + "retries": { + "title": "Retries", + "description": "Setting a value greater than zero will cause the client to resend any record whose send fails with a potentially transient error.", + "type": "integer", + "examples": [2147483647] + }, + "topic_num_partitions": { + "title": "Number of topic partitions", + "description": "The number of topic partitions which will be created on topic creation", + "type": "integer", + "examples": [10] + }, + "topic_replication_factor": { + "title": "Topic replication factor", + "description": "The number of topics to which messages will be replicated", + "type": "integer", + "examples": [10] + }, + "socket_connection_setup_timeout_ms": { + "title": "Socket Connection Setup Timeout", + "description": "The amount of time the client will wait for the socket connection to be established.", + "type": "integer", + "examples": [10000] + }, + "socket_connection_setup_timeout_max_ms": { + "title": "Socket Connection Setup Max Timeout", + "description": "The maximum amount of time the client will wait for the socket connection to be established. The connection setup timeout will increase exponentially for each consecutive connection failure up to this maximum.", + "type": "integer", + "examples": [30000] + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumer.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumer.java new file mode 100644 index 0000000..fc5b22b --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumer.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import java.util.Map; +import org.apache.kafka.clients.consumer.KafkaConsumer; + +public class RedpandaConsumer extends KafkaConsumer { + + public RedpandaConsumer(Map configs) { + super(configs); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumerFactory.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumerFactory.java new file mode 100644 index 0000000..d5b0cad --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaConsumerFactory.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.kafka.clients.consumer.ConsumerConfig; + +public class RedpandaConsumerFactory { + + private RedpandaConsumerFactory() { + + } + + public static RedpandaConsumer getInstance(String bootstrapServers, String groupId) { + Map props = ImmutableMap.builder() + .put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers) + .put(ConsumerConfig.GROUP_ID_CONFIG, groupId) + .put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") + .put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer") + .put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.connect.json.JsonDeserializer") + .build(); + + return new RedpandaConsumer<>(props); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaContainerFactory.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaContainerFactory.java new file mode 100644 index 0000000..66be43e --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaContainerFactory.java @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import org.testcontainers.redpanda.RedpandaContainer; + +class RedpandaContainerFactory { + + private RedpandaContainerFactory() { + + } + + public static RedpandaContainer createRedpandaContainer() { + return new RedpandaContainer("docker.redpanda.com/vectorized/redpanda:v22.2.7"); + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationAcceptanceTest.java new file mode 100644 index 0000000..9d276c4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationAcceptanceTest.java @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.TopicListing; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.redpanda.RedpandaContainer; + +public class RedpandaDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(RedpandaDestinationAcceptanceTest.class); + + private static RedpandaContainer redpandaContainer; + + private RedpandaNameTransformer redpandaNameTransformer; + + private Admin adminClient; + + @BeforeAll + static void initContainer() { + redpandaContainer = RedpandaContainerFactory.createRedpandaContainer(); + redpandaContainer.start(); + } + + @AfterAll + static void stopContainer() { + redpandaContainer.stop(); + redpandaContainer.close(); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + this.redpandaNameTransformer = new RedpandaNameTransformer(); + this.adminClient = AdminClient.create(Map.of( + AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, redpandaContainer.getBootstrapServers(), + AdminClientConfig.RETRIES_CONFIG, 5, + AdminClientConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MS_CONFIG, 3000, + AdminClientConfig.SOCKET_CONNECTION_SETUP_TIMEOUT_MAX_MS_CONFIG, 30000)); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) throws ExecutionException, InterruptedException { + final var topics = adminClient.listTopics().listings().get().stream() + .filter(tl -> !tl.isInternal()) + .map(TopicListing::name) + .collect(Collectors.toSet()); + + adminClient.deleteTopics(topics); + } + + @Override + protected String getImageName() { + return "airbyte/destination-redpanda:dev"; + } + + @Override + protected JsonNode getConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("bootstrap_servers", redpandaContainer.getBootstrapServers()) + .put("compression_type", "none") + .put("batch_size", 16384) + .put("buffer_memory", "33554432") + .put("retries", 1) + .put("topic_num_partitions", 1) + .put("topic_replication_factor", 1) + .put("socket_connection_setup_timeout_ms", 3000) + .put("socket_connection_setup_timeout_max_ms", 3000) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("bootstrap_servers", "127.0.0.9") + .put("compression_type", "none") + .put("batch_size", 16384) + .put("buffer_memory", "33554432") + .put("retries", 1) + .put("topic_num_partitions", 1) + .put("topic_replication_factor", 1) + .put("socket_connection_setup_timeout_ms", 3000) + .put("socket_connection_setup_timeout_max_ms", 3000) + .build()); + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) { + final List records = new ArrayList<>(); + final String bootstrapServers = redpandaContainer.getBootstrapServers(); + final String groupId = redpandaNameTransformer.getIdentifier(namespace + "-" + streamName); + try (final RedpandaConsumer redpandaConsumer = RedpandaConsumerFactory.getInstance(bootstrapServers, groupId)) { + final String topicName = redpandaNameTransformer.topicName(namespace, streamName); + redpandaConsumer.subscribe(Collections.singletonList(topicName)); + redpandaConsumer.poll(Duration.ofSeconds(5)).iterator() + .forEachRemaining(r -> records.add(r.value().get(JavaBaseConstants.COLUMN_NAME_DATA))); + } + return records; + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationTest.java new file mode 100644 index 0000000..925a332 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaDestinationTest.java @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.redpanda.RedpandaContainer; + +class RedpandaDestinationTest { + + private RedpandaContainer redpandaContainer; + + private RedpandaDestination redpandaDestination; + + @BeforeEach + void setup() { + this.redpandaDestination = new RedpandaDestination(); + this.redpandaContainer = RedpandaContainerFactory.createRedpandaContainer(); + this.redpandaContainer.start(); + } + + @AfterEach + void shutdown() { + this.redpandaContainer.stop(); + this.redpandaContainer.close(); + } + + @Test + void testCheckWithSuccess() { + + var jsonConfig = Jsons.jsonNode(ImmutableMap.builder() + .put("bootstrap_servers", redpandaContainer.getBootstrapServers()) + .put("compression_type", "none") + .put("batch_size", 16384) + .put("buffer_memory", "33554432") + .put("retries", 1) + .put("topic_num_partitions", 1) + .put("topic_replication_factor", 1) + .put("socket_connection_setup_timeout_ms", 3000) + .put("socket_connection_setup_timeout_max_ms", 3000) + .build()); + + var status = redpandaDestination.check(jsonConfig); + + assertThat(status.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + + } + + @Test + void testCheckWithFailure() { + + var jsonConfig = Jsons.jsonNode(ImmutableMap.builder() + .put("bootstrap_servers", "127.0.0.9") + .put("compression_type", "none") + .put("batch_size", 16384) + .put("buffer_memory", "33554432") + .put("retries", 1) + .put("topic_num_partitions", 1) + .put("topic_replication_factor", 1) + .put("socket_connection_setup_timeout_ms", 3000) + .put("socket_connection_setup_timeout_max_ms", 3000) + .build()); + + var status = redpandaDestination.check(jsonConfig); + + assertThat(status.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaOperationsTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaOperationsTest.java new file mode 100644 index 0000000..efa071b --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test-integration/java/io/airbyte/integrations/destination/redpanda/RedpandaOperationsTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.redpanda.RedpandaContainer; + +class RedpandaOperationsTest { + + private static final String TEST_TOPIC = "test_topic"; + + private RedpandaOperations redpandaOperations; + + private RedpandaConsumer redpandaConsumer; + + private RedpandaContainer redpandaContainer; + + @BeforeEach + void setup() { + this.redpandaContainer = RedpandaContainerFactory.createRedpandaContainer(); + this.redpandaContainer.start(); + var jsonConfig = Jsons.jsonNode(ImmutableMap.builder() + .put("bootstrap_servers", redpandaContainer.getBootstrapServers()) + .put("compression_type", "none") + .put("batch_size", 16384) + .put("buffer_memory", "33554432") + .put("retries", 1) + .put("topic_num_partitions", 1) + .put("topic_replication_factor", 1) + .put("socket_connection_setup_timeout_ms", 3000) + .put("socket_connection_setup_timeout_max_ms", 3000) + .put("", false) + .build()); + this.redpandaOperations = new RedpandaOperations(RedpandaConfig.createConfig(jsonConfig)); + this.redpandaConsumer = RedpandaConsumerFactory.getInstance(redpandaContainer.getBootstrapServers(), TEST_TOPIC); + } + + @AfterEach + void shutdown() { + this.redpandaOperations.close(); + this.redpandaConsumer.close(); + this.redpandaContainer.stop(); + this.redpandaContainer.close(); + } + + @Test + void testPutRecord() { + + redpandaOperations.putRecord(TEST_TOPIC, UUID.randomUUID().toString(), Jsons.jsonNode(Map.of("attr_1", "data1")), e -> {}); + redpandaOperations.putRecord(TEST_TOPIC, UUID.randomUUID().toString(), Jsons.jsonNode(Map.of("attr_1", "data2")), e -> {}); + redpandaOperations.flush(); + + List records = new ArrayList<>(); + redpandaConsumer.subscribe(Collections.singletonList(TEST_TOPIC)); + redpandaConsumer.poll(Duration.ofSeconds(5)).iterator().forEachRemaining(r -> records.add(r.value())); + + assertThat(records) + .hasSize(2); + } + + @Test + void testCreateTopic() { + + var topicInfo = new RedpandaOperations.TopicInfo(TEST_TOPIC, Optional.of(1), Optional.of((short) 1)); + redpandaOperations.createTopic(Set.of(topicInfo)); + + Set topics = redpandaOperations.listTopics(); + + assertThat(topics).anyMatch(topic -> topic.equals(TEST_TOPIC)); + } + + @Test + void testDeleteTopic() { + + // given + var topicInfo = new RedpandaOperations.TopicInfo(TEST_TOPIC, Optional.of(1), Optional.of((short) 1)); + redpandaOperations.createTopic(Set.of(topicInfo)); + + // when + redpandaOperations.deleteTopic(Set.of(TEST_TOPIC)); + + // then + Set topics = redpandaOperations.listTopics(); + + assertThat(topics).isEmpty(); + + } + + @Test + void testPutRecordBlocking() { + + redpandaOperations.putRecordBlocking(TEST_TOPIC, UUID.randomUUID().toString(), Jsons.jsonNode(Map.of("attr_1", "data1"))); + redpandaOperations.putRecordBlocking(TEST_TOPIC, UUID.randomUUID().toString(), Jsons.jsonNode(Map.of("attr_1", "data2"))); + redpandaOperations.flush(); + + List records = new ArrayList<>(); + redpandaConsumer.subscribe(Collections.singletonList(TEST_TOPIC)); + redpandaConsumer.poll(Duration.ofSeconds(5)).iterator().forEachRemaining(r -> records.add(r.value())); + + assertThat(records) + .hasSize(2); + + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaConfigTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaConfigTest.java new file mode 100644 index 0000000..7fbef95 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaConfigTest.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.commons.json.Jsons; +import java.util.Comparator; +import java.util.Map; +import java.util.Optional; +import org.junit.jupiter.api.Test; + +class RedpandaConfigTest { + + @Test + void testRedpandaConfig() { + + var jsonConfig = Jsons.jsonNode(Map.of( + "bootstrap_servers", "host1:port1,host2:port2", + "buffer_memory", 33554432L, + "compression_type", "none", + "retries", 5, + "batch_size", 16384, + "topic_num_partitions", 1, + "topic_replication_factor", 1, + "socket_connection_setup_timeout_ms", 10000, + "socket_connection_setup_timeout_max_ms", 30000)); + + var redpandaConfig = RedpandaConfig.createConfig(jsonConfig); + + assertThat(redpandaConfig) + .usingComparatorForFields(new OptionalComparator(), "topicNumPartitions", "topicReplicationFactor") + .hasFieldOrPropertyWithValue("bootstrapServers", "host1:port1,host2:port2") + .hasFieldOrPropertyWithValue("bufferMemory", 33554432L) + .hasFieldOrPropertyWithValue("compressionType", "none") + .hasFieldOrPropertyWithValue("retries", 5) + .hasFieldOrPropertyWithValue("batchSize", 16384) + .hasFieldOrPropertyWithValue("topicNumPartitions", Optional.of(1)) + .hasFieldOrPropertyWithValue("topicReplicationFactor", Optional.of((short) 1)) + .hasFieldOrPropertyWithValue("socketConnectionSetupTimeoutMs", 10000) + .hasFieldOrPropertyWithValue("socketConnectionSetupTimeoutMaxMs", 30000); + + } + + private static class OptionalComparator implements Comparator> { + + @Override + public int compare(Optional o1, Optional o2) { + return Integer.compare(o1.get(), o2.get()); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformerTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformerTest.java new file mode 100644 index 0000000..cefb906 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaNameTransformerTest.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; + +class RedpandaNameTransformerTest { + + @Test + void testTransformTopicName() { + + var redpandaNameTransformer = new RedpandaNameTransformer(); + + String topicName = redpandaNameTransformer.topicName("namespace", "stream"); + + assertThat(topicName).isEqualTo("namespace_stream"); + + } + +} diff --git a/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfigTest.java b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfigTest.java new file mode 100644 index 0000000..f0a2487 --- /dev/null +++ b/airbyte-integrations/connectors/destination-redpanda/src/test/java/io/airbyte/integrations/destination/redpanda/RedpandaWriteConfigTest.java @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.redpanda; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import org.junit.jupiter.api.Test; + +class RedpandaWriteConfigTest { + + @Test + void testRedpandaWriteConfig() { + + var writeConfig = new RedpandaWriteConfig("namespace_stream", DestinationSyncMode.OVERWRITE); + + assertThat(writeConfig) + .hasFieldOrPropertyWithValue("topicName", "namespace_stream") + .hasFieldOrPropertyWithValue("destinationSyncMode", DestinationSyncMode.OVERWRITE); + + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/BOOTSTRAP.md b/airbyte-integrations/connectors/destination-rockset/BOOTSTRAP.md new file mode 100644 index 0000000..8e426bd --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/BOOTSTRAP.md @@ -0,0 +1,6 @@ +# Rockset Destination Connector Bootstrap + +[Rockset](https://rockset.com/) is a real-time analytics database for fast queries on fresh data from databases, lakes and streams. +An [API key](https://rockset.com/docs/rest-api/#authentication) is required. +Configured collections will be created as required and data will be written via the [write API](https://rockset.com/docs/rest-api/#adddocuments). + diff --git a/airbyte-integrations/connectors/destination-rockset/README.md b/airbyte-integrations/connectors/destination-rockset/README.md new file mode 100644 index 0000000..7eb71ac --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/README.md @@ -0,0 +1,72 @@ +# Destination Rockset + +This is the repository for the Rockset destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/rockset). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-rockset:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/contributing-to-airbyte/building-new-connector#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-rockset:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-rockset:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-rockset:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-rockset:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-rockset:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-rockset:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/rockset`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/rocksetDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-rockset:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-rockset:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-rockset test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/rockset.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-rockset/build.gradle b/airbyte-integrations/connectors/destination-rockset/build.gradle new file mode 100644 index 0000000..a5e64c0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/build.gradle @@ -0,0 +1,30 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.rockset.RocksetDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +dependencies { + + implementation group: 'com.rockset', name: 'rockset-java', version: '0.9.0' + implementation group: 'org.awaitility', name: 'awaitility', version: '4.1.1' +} diff --git a/airbyte-integrations/connectors/destination-rockset/metadata.yaml b/airbyte-integrations/connectors/destination-rockset/metadata.yaml new file mode 100644 index 0000000..281376e --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/metadata.yaml @@ -0,0 +1,23 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 2c9d93a7-9a17-4789-9de9-f46f0097eb70 + dockerImageTag: 0.1.4 + dockerRepository: airbyte/destination-rockset + githubIssueLabel: destination-rockset + license: MIT + name: Rockset + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/rockset + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetDestination.java b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetDestination.java new file mode 100644 index 0000000..2c4b25c --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetDestination.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import static io.airbyte.integrations.destination.rockset.RocksetUtils.ROCKSET_WORKSPACE_ID; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableMap; +import com.rockset.client.ApiClient; +import com.rockset.client.api.DocumentsApi; +import com.rockset.client.model.AddDocumentsRequest; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import org.apache.commons.lang3.RandomStringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RocksetDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(RocksetDestination.class); + private static final ObjectMapper mapper = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new RocksetDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(final JsonNode config) { + ApiClient client = null; + String workspace = null; + // Create a temporary table + final String cname = "tmp_test_airbyte_collection_" + RandomStringUtils.randomAlphabetic(7).toLowerCase(); + + try { + client = RocksetUtils.apiClientFromConfig(config); + + workspace = config.get(ROCKSET_WORKSPACE_ID).asText(); + RocksetUtils.createWorkspaceIfNotExists(client, workspace); + + RocksetUtils.createCollectionIfNotExists(client, workspace, cname); + RocksetUtils.waitUntilCollectionReady(client, workspace, cname); + + // Write a single document + final String unique = UUID.randomUUID().toString(); + final Map dummyRecord = ImmutableMap.of("_id", unique); + final AddDocumentsRequest req = new AddDocumentsRequest(); + req.addDataItem(mapper.convertValue(dummyRecord, new TypeReference<>() {})); + new DocumentsApi(client).add(workspace, cname, req); + + // Verify that the doc shows up + final String sql = String.format("SELECT * FROM %s.%s WHERE _id = '%s';", workspace, cname, unique); + RocksetUtils.waitUntilDocCount(client, sql, 1); + + LOGGER.info("Check succeeded"); + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.info("Check failed.", e); + return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage() != null ? e.getMessage() : e.toString()); + } finally { + // Delete the collection + if (client != null && workspace != null) { + RocksetUtils.deleteCollectionIfExists(client, workspace, cname); + } + + } + } + + @Override + public AirbyteMessageConsumer getConsumer( + JsonNode config, + ConfiguredAirbyteCatalog catalog, + Consumer outputRecordCollector) + throws Exception { + return new RocksetWriteApiConsumer(config, catalog, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetSQLNameTransformer.java b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetSQLNameTransformer.java new file mode 100644 index 0000000..8faaef9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetSQLNameTransformer.java @@ -0,0 +1,21 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +public class RocksetSQLNameTransformer extends StandardNameTransformer { + + @Override + public String convertStreamName(String input) { + return super.convertStreamName(input).toLowerCase(); + } + + @Override + public String applyDefaultCase(String input) { + return input.toLowerCase(); + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetUtils.java b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetUtils.java new file mode 100644 index 0000000..775564c --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetUtils.java @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.hash.Hasher; +import com.google.common.hash.Hashing; +import com.google.gson.internal.LinkedTreeMap; +import com.rockset.client.ApiClient; +import com.rockset.client.ApiException; +import com.rockset.client.api.CollectionsApi; +import com.rockset.client.api.DocumentsApi; +import com.rockset.client.api.QueriesApi; +import com.rockset.client.api.WorkspacesApi; +import com.rockset.client.model.Collection; +import com.rockset.client.model.CreateCollectionRequest; +import com.rockset.client.model.CreateWorkspaceRequest; +import com.rockset.client.model.DeleteDocumentsRequest; +import com.rockset.client.model.DeleteDocumentsRequestData; +import com.rockset.client.model.ErrorModel; +import com.rockset.client.model.GetCollectionResponse; +import com.rockset.client.model.ListCollectionsResponse; +import com.rockset.client.model.QueryRequest; +import com.rockset.client.model.QueryRequestSql; +import com.rockset.client.model.QueryResponse; +import io.airbyte.commons.lang.Exceptions; +import java.nio.charset.Charset; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.awaitility.Awaitility; +import org.awaitility.Duration; +import org.awaitility.core.ConditionFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RocksetUtils { + + private static final Logger LOGGER = LoggerFactory.getLogger(RocksetUtils.class); + + public static final String ROCKSET_WORKSPACE_ID = "workspace"; + public static final String API_KEY_ID = "api_key"; + public static final String API_SERVER_ID = "api_server"; + public static final Duration DEFAULT_TIMEOUT = new Duration(20, TimeUnit.MINUTES); + public static final Duration DEFAULT_POLL_INTERVAL = Duration.FIVE_SECONDS; + private static final java.time.Duration DEFAULT_HTTP_CLIENT_TIMEOUT = java.time.Duration.ofMinutes(1L); + private static final String DEFAULT_ROCKSET_CLIENT_VERSION = "0.9.0"; + + public static ApiClient apiClientFromConfig(JsonNode config) { + final String apiKey = config.get(API_KEY_ID).asText(); + final String apiServer = config.get(API_SERVER_ID).asText(); + return apiClient(apiKey, apiServer); + } + + public static ApiClient apiClient(String apiKey, String apiServer) { + final ApiClient client = new ApiClient(); + + client.setReadTimeout((int) DEFAULT_HTTP_CLIENT_TIMEOUT.toMillis()) + .setConnectTimeout((int) DEFAULT_HTTP_CLIENT_TIMEOUT.toMillis()) + .setWriteTimeout((int) DEFAULT_HTTP_CLIENT_TIMEOUT.toMillis()); + + client.setApiKey(apiKey); + client.setApiServer(apiServer); + client.setVersion(DEFAULT_ROCKSET_CLIENT_VERSION); + return client; + } + + public static void createWorkspaceIfNotExists(ApiClient client, String workspace) { + final CreateWorkspaceRequest request = new CreateWorkspaceRequest().name(workspace); + + try { + new WorkspacesApi(client).create(request); + LOGGER.info(String.format("Created workspace %s", workspace)); + } catch (ApiException e) { + if (e.getCode() == 400 && e.getErrorModel().getType() == ErrorModel.TypeEnum.ALREADYEXISTS) { + LOGGER.info(String.format("Workspace %s already exists", workspace)); + return; + } + + throw new RuntimeException(e); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // Assumes the workspace exists + public static void createCollectionIfNotExists(ApiClient client, String workspace, String cname) { + final CreateCollectionRequest request = new CreateCollectionRequest().name(cname); + try { + new CollectionsApi(client).create(workspace, request); + LOGGER.info(String.format("Created collection %s.%s", workspace, cname)); + } catch (ApiException e) { + if (e.getCode() == 400 && e.getErrorModel().getType() == ErrorModel.TypeEnum.ALREADYEXISTS) { + LOGGER.info(String.format("Collection %s.%s already exists", workspace, cname)); + return; + } + throw new RuntimeException(e); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // Assumes the collection exists + public static void deleteCollectionIfExists(ApiClient client, String workspace, String cname) { + try { + new CollectionsApi(client).delete(workspace, cname); + LOGGER.info(String.format("Deleted collection %s.%s", workspace, cname)); + } catch (ApiException e) { + if (e.getCode() == 404 && e.getErrorModel().getType() == ErrorModel.TypeEnum.NOTFOUND) { + LOGGER.info(String.format("Collection %s.%s does not exist", workspace, cname)); + return; + } + + throw new RuntimeException(e); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + // Assumes the collection exists + public static void waitUntilCollectionReady(ApiClient client, String workspace, String cname) { + pollingConfig(workspace, cname) + .until(() -> isCollectionReady(client, workspace, cname)); + + } + + private static boolean isCollectionReady(ApiClient client, String workspace, String cname) throws Exception { + final GetCollectionResponse resp = new CollectionsApi(client).get(workspace, cname); + final Collection.StatusEnum status = resp.getData().getStatus(); + if (status == Collection.StatusEnum.READY) { + LOGGER.info(String.format("Collection %s.%s is READY", workspace, cname)); + return true; + } else { + LOGGER.info( + String.format( + "Waiting until %s.%s is READY, it is %s", workspace, cname, status.toString())); + return false; + } + } + + // Assumes the collection exists + public static void waitUntilCollectionDeleted(ApiClient client, String workspace, String cname) { + pollingConfig(workspace, cname) + .until(() -> isCollectionDeleted(client, workspace, cname)); + + } + + private static boolean isCollectionDeleted(ApiClient client, String workspace, String cname) throws Exception { + try { + new CollectionsApi(client).get(workspace, cname); + LOGGER.info( + String.format( + "Collection %s.%s still exists, waiting for deletion to complete", + workspace, cname)); + } catch (ApiException e) { + if (e.getCode() == 404 && e.getErrorModel().getType() == ErrorModel.TypeEnum.NOTFOUND) { + LOGGER.info(String.format("Collection %s.%s does not exist", workspace, cname)); + return true; + } + + throw e; + } + return false; + } + + // Assumes the collection exists + public static void waitUntilDocCount(ApiClient client, String sql, int desiredCount) { + pollingConfig(sql) + .until(() -> queryMatchesCount(client, sql, desiredCount)); + } + + private static boolean queryMatchesCount(ApiClient client, String sql, int desiredCount) throws Exception { + LOGGER.info(String.format("Running query %s", sql)); + final QueryRequestSql qrs = new QueryRequestSql(); + qrs.setQuery(sql); + + final QueryRequest qr = new QueryRequest(); + qr.setSql(qrs); + + final QueryResponse response = new QueriesApi(client).query(qr); + final int resultCount = response.getResults().size(); + + if (resultCount == desiredCount) { + LOGGER.info(String.format("Desired result count %s found", desiredCount)); + return true; + } else { + LOGGER.info( + String.format( + "Waiting for desired result count %s, current is %s", desiredCount, resultCount)); + return false; + } + } + + private static boolean doesCollectionExist(ApiClient client, String workspace, String cname) throws Exception { + final ListCollectionsResponse collectionsResponse = new CollectionsApi(client).workspace(workspace); + return collectionsResponse + .getData() + .stream() + .anyMatch(coll -> coll.getName().equals(cname)); + } + + public static void clearCollectionIfCollectionExists(ApiClient client, String workspace, String cname) { + Exceptions.toRuntime(() -> { + + if (!doesCollectionExist(client, workspace, cname)) { + return; + } + + final QueryRequest qr = new QueryRequest().sql(new QueryRequestSql().query(String.format("SELECT _id from %s.%s", workspace, cname))); + try { + final QueryResponse resp = new QueriesApi(client).query(qr); + final List ids = + resp.getResults().stream().map(f -> (LinkedTreeMap) f).map(f -> (String) f.get("_id")).collect(Collectors.toList()); + final DeleteDocumentsRequest ddr = new DeleteDocumentsRequest(); + for (String id : ids) { + ddr.addDataItem(new DeleteDocumentsRequestData().id(id)); + } + LOGGER.info("Deleting documents from " + cname); + new DocumentsApi(client).delete(workspace, cname, ddr); + } catch (Exception e) { + LOGGER.error("Error while trying to clear a collection ", e); + } + + pollingConfig(workspace, cname) + .until(() -> isCollectionEmpty(client, workspace, cname)); + + }); + } + + private static boolean isCollectionEmpty(ApiClient client, String workspace, String cname) { + return Exceptions.toRuntime(() -> { + final String elementCount = String.format("SELECT count(*) as numel from %s.%s", workspace, cname); + + final QueryRequest qr = new QueryRequest().sql(new QueryRequestSql().query(elementCount)); + final QueryResponse resp = new QueriesApi(client).query(qr); + Optional count = + resp.getResults().stream().map(f -> (LinkedTreeMap) f).map(f -> f.get("numel")).map(f -> (Number) f).findFirst(); + return count.filter(number -> number.intValue() == 0).isPresent(); + + }); + + } + + private static Duration jitter(String... args) { + final Hasher hsh = Hashing.murmur3_32().newHasher(); + for (String s : args) { + hsh.putString(s, Charset.defaultCharset()); + } + + return new Duration(Math.abs(hsh.hash().asInt()) % DEFAULT_POLL_INTERVAL.getValueInMS(), TimeUnit.MILLISECONDS); + + } + + private static ConditionFactory pollingConfig(final String... args) { + return Awaitility.await() + .timeout(DEFAULT_TIMEOUT) + .pollDelay(jitter(args)) + .pollInterval(DEFAULT_POLL_INTERVAL); + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumer.java b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumer.java new file mode 100644 index 0000000..1dd6687 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/main/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumer.java @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import static io.airbyte.integrations.destination.rockset.RocksetUtils.API_KEY_ID; +import static io.airbyte.integrations.destination.rockset.RocksetUtils.API_SERVER_ID; +import static io.airbyte.integrations.destination.rockset.RocksetUtils.ROCKSET_WORKSPACE_ID; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.rockset.client.ApiClient; +import com.rockset.client.api.DocumentsApi; +import com.rockset.client.model.AddDocumentsRequest; +import com.rockset.client.model.AddDocumentsResponse; +import com.rockset.client.model.DocumentStatus; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.lang.Exceptions; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RocksetWriteApiConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(RocksetWriteApiConsumer.class); + private static final ObjectMapper mapper = new ObjectMapper(); + // IO bound tasks, use cached thread pool + private final ExecutorService exec = Executors.newFixedThreadPool(5); + + private final ScheduledExecutorService schedExec = Executors.newSingleThreadScheduledExecutor(); + + private final String apiKey; + private final String apiServer; + private final String workspace; + + private final ConfiguredAirbyteCatalog catalog; + private final Consumer outputRecordCollector; + + // records to be sent per collection + private final Map> records; + private final RocksetSQLNameTransformer nameTransformer = new RocksetSQLNameTransformer(); + private long lastSentDocumentMicroSeconds = 0L; + private ApiClient client; + + public RocksetWriteApiConsumer( + JsonNode config, + ConfiguredAirbyteCatalog catalog, + Consumer outputRecordCollector) { + this.apiKey = config.get(API_KEY_ID).asText(); + this.apiServer = config.get(API_SERVER_ID).asText(); + this.workspace = config.get(ROCKSET_WORKSPACE_ID).asText(); + this.records = new HashMap<>(); + + this.catalog = catalog; + this.outputRecordCollector = outputRecordCollector; + } + + @Override + protected void startTracked() throws Exception { + this.client = RocksetUtils.apiClient(apiKey, apiServer); + LOGGER.info("Creating workspace"); + RocksetUtils.createWorkspaceIfNotExists(client, workspace); + + CompletableFuture[] overwrittenStreams = catalog.getStreams() + .stream() + .filter(s -> s.getDestinationSyncMode() == DestinationSyncMode.OVERWRITE) + .map(s -> s.getStream().getName()) + .map(nameTransformer::convertStreamName) + .map(this::emptyCollection) + .collect(Collectors.toList()) + .toArray(CompletableFuture[]::new); + + CompletableFuture[] appendStreams = catalog.getStreams().stream() + .filter(s -> s.getDestinationSyncMode() == DestinationSyncMode.APPEND) + .map(s -> s.getStream().getName()) + .map(nameTransformer::convertStreamName) + .map(this::createCollectionIntoReadyState) + .collect(Collectors.toList()) + .toArray(CompletableFuture[]::new); + + CompletableFuture initStreams = CompletableFuture.allOf( + CompletableFuture.allOf(overwrittenStreams), + CompletableFuture.allOf(appendStreams)); + + // Creating and readying many collections at once can be slow + initStreams.get(30, TimeUnit.MINUTES); + + // Schedule sending of records at a fixed rate + schedExec.scheduleAtFixedRate(this::sendBatches, 0L, 5L, TimeUnit.SECONDS); + } + + @Override + protected void acceptTracked(AirbyteMessage message) throws Exception { + if (message.getType() == AirbyteMessage.Type.RECORD) { + String cname = nameTransformer.convertStreamName(message.getRecord().getStream()); + + Map obj = mapper.convertValue(message.getRecord().getData(), new TypeReference<>() {}); + long current = ChronoUnit.MICROS.between(Instant.EPOCH, Instant.now()); + + // ensure a monotonic timestamp on records at microsecond precision. + while (current <= lastSentDocumentMicroSeconds) { + current = ChronoUnit.MICROS.between(Instant.EPOCH, Instant.now()); + } + lastSentDocumentMicroSeconds = current; + + // microsecond precision + // See https://rockset.com/docs/special-fields/#the-_event_time-field + obj.put("_event_time", current); + addRequestToBatch(obj, cname); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + this.outputRecordCollector.accept(message); + } + } + + @Override + protected void close(boolean hasFailed) throws Exception { + // Nothing to do + LOGGER.info("Shutting down!"); + LOGGER.info("Sending final batch of records if any remain!"); + sendBatches(); + LOGGER.info("Final batch of records sent!"); + LOGGER.info("Shutting down executors"); + this.schedExec.shutdown(); + exec.shutdown(); + LOGGER.info("Executors shut down"); + } + + private void addRequestToBatch(Object document, String cname) { + synchronized (this.records) { + List collectionRecords = this.records.getOrDefault(cname, new ArrayList<>()); + collectionRecords.add(document); + this.records.put(cname, collectionRecords); + } + } + + private void sendBatches() { + List> requests; + synchronized (this.records) { + requests = this.records.entrySet().stream().filter(e -> e.getValue().size() > 0) + .map((e) -> { + AddDocumentsRequest adr = new AddDocumentsRequest(); + e.getValue().forEach(adr::addDataItem); + return Map.entry(e.getKey(), adr); + } + + ).collect(Collectors.toList()); + this.records.clear(); + } + List responses; + responses = requests.stream().map((e) -> Exceptions.toRuntime(() -> new DocumentsApi(client).add(workspace, e.getKey(), e.getValue()))) + .collect(Collectors.toList()); + + responses + .stream() + .flatMap(d -> d.getData().stream()) + .collect(Collectors.groupingBy(DocumentStatus::getStatus)) + .entrySet() + .stream() + .forEach((e) -> LOGGER.info("{} documents added with a status of {}", e.getValue().size(), e.getKey())); + } + + private CompletableFuture emptyCollection(String cname) { + return CompletableFuture.runAsync(() -> { + RocksetUtils.clearCollectionIfCollectionExists(client, workspace, cname); + RocksetUtils.createCollectionIfNotExists(client, workspace, cname); + RocksetUtils.waitUntilCollectionReady(client, workspace, cname); + }, exec); + } + + private CompletableFuture createCollectionIntoReadyState(String cname) { + return CompletableFuture.runAsync(() -> { + RocksetUtils.createCollectionIfNotExists(client, workspace, cname); + RocksetUtils.waitUntilCollectionReady(client, workspace, cname); + }, exec); + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-rockset/src/main/resources/spec.json new file mode 100644 index 0000000..1bf5a3a --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/main/resources/spec.json @@ -0,0 +1,39 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/rockset", + "supportsIncremental": true, + "supported_destination_sync_modes": ["append", "overwrite"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Rockset Destination Spec", + "type": "object", + "required": ["api_key", "workspace"], + "additionalProperties": false, + "properties": { + "api_key": { + "title": "Api Key", + "description": "Rockset api key", + "type": "string", + "order": 0, + "airbyte_secret": true + }, + "workspace": { + "title": "Workspace", + "description": "The Rockset workspace in which collections will be created + written to.", + "type": "string", + "examples": ["commons", "my_workspace"], + "default": "commons", + "airbyte_secret": false, + "order": 1 + }, + "api_server": { + "title": "Api Server", + "description": "Rockset api URL", + "type": "string", + "airbyte_secret": false, + "default": "https://api.rs2.usw2.rockset.com", + "pattern": "^https:\\/\\/.*.rockset.com$", + "order": 2 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/test-integration/java/io/airbyte/integrations/destination/rockset/RocksetDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-rockset/src/test-integration/java/io/airbyte/integrations/destination/rockset/RocksetDestinationAcceptanceTest.java new file mode 100644 index 0000000..81c294e --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/test-integration/java/io/airbyte/integrations/destination/rockset/RocksetDestinationAcceptanceTest.java @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.rockset.client.ApiClient; +import com.rockset.client.api.QueriesApi; +import com.rockset.client.model.QueryRequest; +import com.rockset.client.model.QueryRequestSql; +import com.squareup.okhttp.Response; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.lang.Exceptions; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.stream.Collectors; +import org.junit.jupiter.api.AfterAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testng.collections.Sets; + +public class RocksetDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final ObjectMapper mapper = new ObjectMapper(); + private static final Set collectionsToClear = Sets.newHashSet(); + private static final Set collectionsToDelete = Sets.newHashSet(); + private static final ExecutorService tearDownExec = Executors.newCachedThreadPool(); + private static final RocksetSQLNameTransformer nameTransformer = new RocksetSQLNameTransformer(); + + private static final Logger LOGGER = + LoggerFactory.getLogger(RocksetDestinationAcceptanceTest.class); + + @Override + protected String getImageName() { + return "airbyte/destination-rockset:dev"; + } + + @Override + protected JsonNode getConfig() throws IOException { + return Jsons.deserialize(IOs.readFile(Path.of("secrets/config.json"))); + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected JsonNode getFailCheckConfig() throws Exception { + return Jsons.jsonNode( + ImmutableMap.builder() + .put("workspace", "commons") + .put("api_key", "nope nope nope") + .build()); + } + + @Override + protected List retrieveRecords( + TestDestinationEnv testEnv, + String stream, + String namespace, + JsonNode streamSchema) + throws Exception { + + final String ws = getConfig().get("workspace").asText(); + final ApiClient client = RocksetUtils.apiClientFromConfig(getConfig()); + final String streamName = nameTransformer.convertStreamName(stream); + LOGGER.info("Retrieving records for " + streamName); + + RocksetUtils.createWorkspaceIfNotExists(client, ws); + RocksetUtils.createCollectionIfNotExists(client, ws, streamName); + RocksetUtils.waitUntilCollectionReady(client, ws, streamName); + collectionsToClear.add(streamName); + collectionsToDelete.add(streamName); + + // ORDER BY _event_time because the test suite expects to retrieve messages in the order they + // were + // originally written + final String sqlText = String.format("SELECT * FROM %s.%s ORDER BY _event_time;", ws, streamName); + + final QueryRequest query = new QueryRequest().sql(new QueryRequestSql().query(sqlText)); + + final QueriesApi queryClient = new QueriesApi(RocksetUtils.apiClientFromConfig(getConfig())); + + LOGGER.info("About to wait for indexing on " + streamName); + try { + // As Rockset is not a transactional database, we have to wait a few seconds to be extra sure + // that we've given documents enough time to be fully indexed when retrieving records + Thread.sleep(20_000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + List results = new ArrayList<>(); + int previousResultSize; + // By heuristic once the document level stabilizes, the ingestion is probably done + do { + previousResultSize = results.size(); + Thread.sleep(10_000); + final Response response = queryClient.queryCall(query, null, null).execute(); + final JsonNode json = mapper.readTree(response.body().string()); + results = Lists.newArrayList(json.get("results").iterator()); + LOGGER.info("Waiting on stable doc counts, prev= " + previousResultSize + " currrent=" + results.size()); + } while (results.size() != previousResultSize); + + return results.stream() + .peek(RocksetDestinationAcceptanceTest::dropRocksetAddedFields) + .collect(Collectors.toList()); + } + + private static void dropRocksetAddedFields(JsonNode n) { + dropFields(n, "_id", "_event_time"); + } + + private static void dropFields(JsonNode node, String... fields) { + Arrays.stream(fields).forEach(((ObjectNode) node)::remove); + } + + @Override + protected void setup(TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) { + // Nothing to do + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + try { + final ApiClient client = RocksetUtils.apiClientFromConfig(getConfig()); + String workspace = getConfig().get("workspace").asText(); + collectionsToClear.stream() + .map( + cn -> CompletableFuture.runAsync(() -> { + RocksetUtils.clearCollectionIfCollectionExists(client, workspace, cn); + }, tearDownExec)) + // collect to avoid laziness of stream + .collect(Collectors.toList()) + .forEach(CompletableFuture::join); + collectionsToClear.clear(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @AfterAll + public static void exitSuite() throws Exception { + LOGGER.info("Deleting all collections used during testing "); + final JsonNode config = Jsons.deserialize(IOs.readFile(Path.of("secrets/config.json"))); + final ApiClient client = RocksetUtils.apiClientFromConfig(config); + final String workspace = config.get("workspace").asText(); + collectionsToDelete.stream().map(cn -> deleteCollection(client, workspace, cn)).collect(Collectors.toList()).forEach(CompletableFuture::join); + tearDownExec.shutdown(); + + } + + private static CompletableFuture deleteCollection(ApiClient client, String workspace, String cn) { + return CompletableFuture.runAsync( + () -> Exceptions.toRuntime( + () -> { + RocksetUtils.deleteCollectionIfExists(client, workspace, cn); + RocksetUtils.waitUntilCollectionDeleted(client, workspace, cn); + Thread.sleep(2500); // Let services pick up deletion in case of re-creation + }), + tearDownExec); + } + +} diff --git a/airbyte-integrations/connectors/destination-rockset/src/test/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumerTest.java b/airbyte-integrations/connectors/destination-rockset/src/test/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumerTest.java new file mode 100644 index 0000000..6146f73 --- /dev/null +++ b/airbyte-integrations/connectors/destination-rockset/src/test/java/io/airbyte/integrations/destination/rockset/RocksetWriteApiConsumerTest.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.rockset; + +import static io.airbyte.integrations.destination.rockset.RocksetUtils.API_KEY_ID; +import static io.airbyte.integrations.destination.rockset.RocksetUtils.API_SERVER_ID; +import static io.airbyte.integrations.destination.rockset.RocksetUtils.ROCKSET_WORKSPACE_ID; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.function.Consumer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +public class RocksetWriteApiConsumerTest extends PerStreamStateMessageTest { + + @Mock + private Consumer outputRecordCollector; + + @Mock + private ConfiguredAirbyteCatalog catalog; + + private RocksetWriteApiConsumer consumer; + + @BeforeEach + public void init() { + consumer = new RocksetWriteApiConsumer(getTestConfig(), catalog, outputRecordCollector); + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + + private JsonNode getTestConfig() { + return Jsons.jsonNode( + ImmutableMap.builder() + .put(API_KEY_ID, "testApiKey") + .put(API_SERVER_ID, "testApiServerId") + .put(ROCKSET_WORKSPACE_ID, "testRocksetWorkspaceId") + .build()); + } + +} diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/README.md b/airbyte-integrations/connectors/destination-scaffold-destination-python/README.md new file mode 100644 index 0000000..f22d294 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/README.md @@ -0,0 +1,159 @@ +# Scaffold Destination Python Destination + +This is the repository for the Scaffold Destination Python destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/destinations/scaffold-destination-python). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.9.0` + +#### Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/destinations/scaffold-destination-python) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_scaffold_destination_python/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination scaffold-destination-python test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Use `airbyte-ci` to build your connector +The Airbyte way of building this connector is to use our `airbyte-ci` tool. +You can follow install instructions [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#L1). +Then running the following command will build your connector: + +```bash +airbyte-ci connectors --name destination-scaffold-destination-python build +``` +Once the command is done, you will find your connector image in your local docker registry: `airbyte/destination-scaffold-destination-python:dev`. + +##### Customizing our build process +When contributing on our connector you might need to customize the build process to add a system dependency or set an env var. +You can customize our build process by adding a `build_customization.py` module to your connector. +This module should contain a `pre_connector_install` and `post_connector_install` async function that will mutate the base image and the connector container respectively. +It will be imported at runtime by our build process and the functions will be called if they exist. + +Here is an example of a `build_customization.py` module: +```python +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + # Feel free to check the dagger documentation for more information on the Container object and its methods. + # https://dagger-io.readthedocs.io/en/sdk-python-v0.6.4/ + from dagger import Container + + +async def pre_connector_install(base_image_container: Container) -> Container: + return await base_image_container.with_env_variable("MY_PRE_BUILD_ENV_VAR", "my_pre_build_env_var_value") + +async def post_connector_install(connector_container: Container) -> Container: + return await connector_container.with_env_variable("MY_POST_BUILD_ENV_VAR", "my_post_build_env_var_value") +``` + +#### Build your own connector image +This connector is built using our dynamic built process in `airbyte-ci`. +The base image used to build it is defined within the metadata.yaml file under the `connectorBuildOptions`. +The build logic is defined using [Dagger](https://dagger.io/) [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/pipelines/builds/python_connectors.py). +It does not rely on a Dockerfile. + +If you would like to patch our connector and build your own a simple approach would be to: + +1. Create your own Dockerfile based on the latest version of the connector image. +```Dockerfile +FROM airbyte/destination-scaffold-destination-python:latest + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +# ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +# ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +``` +Please use this as an example. This is not optimized. + +2. Build your image: +```bash +docker build -t airbyte/destination-scaffold-destination-python:dev . +# Running the spec command against your patched connector +docker run airbyte/destination-scaffold-destination-python:dev spec +```` +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-scaffold-destination-python:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-scaffold-destination-python:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-scaffold-destination-python:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all destination connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Coming soon: + +### Using `airbyte-ci` to run tests +See [airbyte-ci documentation](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md#connectors-test-command) + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/__init__.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/__init__.py new file mode 100644 index 0000000..c1075c9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationScaffoldDestinationPython + +__all__ = ["DestinationScaffoldDestinationPython"] diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/destination.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/destination.py new file mode 100644 index 0000000..55575d6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/destination.py @@ -0,0 +1,53 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from typing import Any, Iterable, Mapping + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status + + +class DestinationScaffoldDestinationPython(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + + """ + TODO + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + + pass + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + # TODO + + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/spec.json b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/spec.json new file mode 100644 index 0000000..7b75cfc --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/destination_scaffold_destination_python/spec.json @@ -0,0 +1,20 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/scaffold-destination-python", + "supported_destination_sync_modes": [ + "TODO, available options are: 'overwrite', 'append', and 'append_dedup'" + ], + "supportsIncremental": true, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Scaffold Destination Python", + "type": "object", + "required": ["TODO -- fix me!"], + "additionalProperties": false, + "properties": { + "TODO": { + "type": "string", + "description": "FIX ME" + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/integration_tests/integration_test.py new file mode 100644 index 0000000..d945ab6 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/integration_tests/integration_test.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +def integration_test(): + # TODO write integration tests + pass diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/main.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/main.py new file mode 100644 index 0000000..facd8fd --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_scaffold_destination_python import DestinationScaffoldDestinationPython + +if __name__ == "__main__": + DestinationScaffoldDestinationPython().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/metadata.yaml b/airbyte-integrations/connectors/destination-scaffold-destination-python/metadata.yaml new file mode 100644 index 0000000..6665273 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/metadata.yaml @@ -0,0 +1,30 @@ +data: + allowedHosts: + hosts: + - TODO # Please change to the hostname of the source. + registries: + oss: + enabled: false + cloud: + enabled: false + connectorBuildOptions: + # Please update to the latest version of the connector base image. + # Please use the full address with sha256 hash to guarantee build reproducibility. + # https://hub.docker.com/r/airbyte/python-connector-base + baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 + connectorSubtype: database + connectorType: destination + definitionId: 1c342214-aad1-4344-8ee8-92c8c7e91c07 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-scaffold-destination-python + githubIssueLabel: destination-scaffold-destination-python + icon: scaffold-destination-python.svg + license: MIT + name: Scaffold Destination Python + releaseDate: TODO + releaseStage: alpha + supportLevel: archived + documentationUrl: https://docs.airbyte.com/integrations/destinations/scaffold-destination-python + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/requirements.txt b/airbyte-integrations/connectors/destination-scaffold-destination-python/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/setup.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/setup.py new file mode 100644 index 0000000..18cf103 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/setup.py @@ -0,0 +1,25 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", +] + +TEST_REQUIREMENTS = ["pytest~=6.2"] + +setup( + name="destination_scaffold_destination_python", + description="Destination implementation for Scaffold Destination Python.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-scaffold-destination-python/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-scaffold-destination-python/unit_tests/unit_test.py new file mode 100644 index 0000000..219ae01 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scaffold-destination-python/unit_tests/unit_test.py @@ -0,0 +1,7 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +def test_example_method(): + assert True diff --git a/airbyte-integrations/connectors/destination-scylla/README.md b/airbyte-integrations/connectors/destination-scylla/README.md new file mode 100644 index 0000000..6fc6d93 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/README.md @@ -0,0 +1,72 @@ +# Destination Scylla + +This is the repository for the Scylla destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/scylla). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-scylla:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-scylla:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-scylla:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-scylla:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-scylla:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-scylla:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-scylla:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/scylla`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/scyllaDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-scylla:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-scylla:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-scylla test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/scylla.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-scylla/bootstrap.md b/airbyte-integrations/connectors/destination-scylla/bootstrap.md new file mode 100644 index 0000000..3a2e33f --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/bootstrap.md @@ -0,0 +1,32 @@ +# Scylla Destination + +Scylla is an open-source distributed NoSQL wide-column data store designed to handle large amounts of data across many +commodity servers, providing high availability with no single point of failure. It is designed to be compatible with +Apache Cassandra while achieving significantly higher throughputs and lower latencies. It supports the same protocols as +Cassandra (CQL and Thrift) and the same file formats (SSTable) + +The data is structured in keyspaces and tables and is partitioned and replicated across different nodes in the +cluster. +[Read more about Scylla](https://www.scylladb.com/) + +This connector maps an incoming `stream` to a Scylla `table` and a `namespace` to a Scylla`keyspace`. +When using destination sync mode `append` and `append_dedup`, an `insert` operation is performed against an existing +Scylla table. +When using `overwrite`, the records are first placed in a temp table. When all the messages have been received the data +is copied to the final table which is first truncated and the temp table is deleted. + +The Implementation uses the [Scylla](https://github.com/scylladb/java-driver/) driver in order to access +Scylla. [ScyllaCqlProvider](./src/main/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProvider.java) +handles the communication with the Scylla cluster and internally it uses +the [ScyllaSessionPool](./src/main/java/io/airbyte/integrations/destination/scylla/ScyllaSessionPool.java) to retrieve a +session to the cluster. + +The [ScyllaMessageConsumer](./src/main/java/io/airbyte/integrations/destination/scylla/ScyllaMessageConsumer.java) +class contains the logic for handling airbyte messages, events and copying data between tables. + +## Development + +See the [ScyllaCqlProvider](./src/main/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProvider.java) +class on how to use the Scylla driver. + +[Scylla driver docs.](https://docs.scylladb.com/using-scylla/drivers/cql-drivers/scylla-java-driver/) \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-scylla/build.gradle b/airbyte-integrations/connectors/destination-scylla/build.gradle new file mode 100644 index 0000000..512279a --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/build.gradle @@ -0,0 +1,37 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.scylla.ScyllaDestination' + applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0'] +} + +def scyllaDriver = '3.10.2-scylla-1' +def assertVersion = '3.21.0' + +dependencies { + + implementation "com.scylladb:scylla-driver-core:${scyllaDriver}" + + // https://mvnrepository.com/artifact/org.assertj/assertj-core + testImplementation "org.assertj:assertj-core:${assertVersion}" + // https://mvnrepository.com/artifact/org.testcontainers/testcontainers + testImplementation libs.testcontainers.scylla +} diff --git a/airbyte-integrations/connectors/destination-scylla/docker-compose.yml b/airbyte-integrations/connectors/destination-scylla/docker-compose.yml new file mode 100644 index 0000000..af0ecac --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/docker-compose.yml @@ -0,0 +1,19 @@ +version: "3" + +services: + scylla1: + image: scylladb/scylla + ports: + - "9042:9042" + container_name: scylla1 + command: --smp 1 +# uncomment if you want to run a cluster of scylladb nodes +# scylla2: +# image: scylladb/scylla +# container_name: scylla2 +# command: --seeds=scylla1 +# +# scylla3: +# image: scylladb/scylla +# container_name: scylla3 +# command: --seeds=scylla1 diff --git a/airbyte-integrations/connectors/destination-scylla/icon.svg b/airbyte-integrations/connectors/destination-scylla/icon.svg new file mode 100644 index 0000000..8a096f8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-scylla/metadata.yaml b/airbyte-integrations/connectors/destination-scylla/metadata.yaml new file mode 100644 index 0000000..fb5797b --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 3dc6f384-cd6b-4be3-ad16-a41450899bf0 + dockerImageTag: 0.1.3 + dockerRepository: airbyte/destination-scylla + githubIssueLabel: destination-scylla + icon: scylla.svg + license: MIT + name: Scylla + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/scylla + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaConfig.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaConfig.java new file mode 100644 index 0000000..8bc995f --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaConfig.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Objects; + +public class ScyllaConfig { + + private final String keyspace; + + private final String username; + + private final String password; + + private final String address; + + private final int port; + + private final int replication; + + public ScyllaConfig(String keyspace, String username, String password, String address, int port, int replication) { + this.keyspace = keyspace; + this.username = username; + this.password = password; + this.address = address; + this.port = port; + this.replication = replication; + } + + public ScyllaConfig(JsonNode jsonNode) { + this.keyspace = jsonNode.get("keyspace").asText(); + this.username = jsonNode.get("username").asText(); + this.password = jsonNode.get("password").asText(); + this.address = jsonNode.get("address").asText(); + this.port = jsonNode.get("port").asInt(); + this.replication = jsonNode.get("replication").asInt(1); + } + + public String getKeyspace() { + return keyspace; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + public String getAddress() { + return address; + } + + public int getPort() { + return port; + } + + public int getReplication() { + return replication; + } + + @Override + public String toString() { + return "ScyllaConfig{" + + "keyspace='" + keyspace + '\'' + + ", username='" + username + '\'' + + ", password='" + password + '\'' + + ", address='" + address + '\'' + + ", port=" + port + + ", replication=" + replication + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ScyllaConfig that = (ScyllaConfig) o; + return port == that.port && username.equals(that.username) && password.equals(that.password) && + address.equals(that.address); + } + + @Override + public int hashCode() { + return Objects.hash(username, password, address, port); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProvider.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProvider.java new file mode 100644 index 0000000..d296bcc --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProvider.java @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.datastax.driver.core.AbstractTableMetadata; +import com.datastax.driver.core.BatchStatement; +import com.datastax.driver.core.BoundStatement; +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.DataType; +import com.datastax.driver.core.PreparedStatement; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.querybuilder.QueryBuilder; +import com.datastax.driver.core.schemabuilder.SchemaBuilder; +import com.datastax.driver.core.utils.UUIDs; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import java.io.Closeable; +import java.time.Instant; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScyllaCqlProvider implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(ScyllaCqlProvider.class); + + private static final int N_THREADS = Runtime.getRuntime().availableProcessors(); + + private final ScyllaConfig scyllaConfig; + + private final Cluster cluster; + + private final Session session; + + private final ExecutorService executorService; + + private final String columnId; + + private final String columnData; + + private final String columnTimestamp; + + public ScyllaCqlProvider(ScyllaConfig scyllaConfig) { + this.scyllaConfig = scyllaConfig; + var sessionTuple = ScyllaSessionPool.initSession(scyllaConfig); + this.cluster = sessionTuple.value1(); + this.session = sessionTuple.value2(); + this.executorService = Executors.newFixedThreadPool(N_THREADS); + var nameTransformer = new ScyllaNameTransformer(scyllaConfig); + this.columnId = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_AB_ID); + this.columnData = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_DATA); + this.columnTimestamp = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + } + + public void createKeyspaceIfNotExists(String keyspace) { + var createKeyspace = SchemaBuilder.createKeyspace(keyspace) + .ifNotExists() + .with() + .replication(Map.of( + "class", "SimpleStrategy", + "replication_factor", scyllaConfig.getReplication())) + .durableWrites(true); + session.execute(createKeyspace); + } + + public void createTableIfNotExists(String keyspace, String table) { + var createTable = SchemaBuilder.createTable(keyspace, table) + .ifNotExists() + .addPartitionKey(columnId, DataType.uuid()) + .addColumn(columnData, DataType.text()) + .addColumn(columnTimestamp, DataType.timestamp()); + session.execute(createTable); + } + + public void dropTableIfExists(String keyspace, String table) { + var drop = SchemaBuilder.dropTable(keyspace, table).ifExists(); + session.execute(drop); + } + + public void truncate(String keyspace, String table) { + var truncate = QueryBuilder.truncate(keyspace, table); + session.execute(truncate); + } + + public void insert(String keyspace, String table, String data) { + var insert = QueryBuilder.insertInto(keyspace, table) + .value(columnId, UUIDs.random()) + .value(columnData, data) + .value(columnTimestamp, Instant.now().toEpochMilli()); + session.execute(insert); + } + + public List> select(String keyspace, String table) { + var select = QueryBuilder.select().all().from(keyspace, table); + return session.execute(select).all().stream() + .map(r -> Triplet.of( + r.get(columnId, UUID.class), + r.get(columnData, String.class), + r.get(columnTimestamp, Date.class).toInstant())) + .collect(Collectors.toList()); + } + + public List>> metadata() { + return cluster.getMetadata().getKeyspaces().stream() + .map(keyspace -> Tuple.of(keyspace.getName(), keyspace.getTables().stream() + .map(AbstractTableMetadata::getName) + .collect(Collectors.toList()))) + .collect(Collectors.toList()); + } + + public void copy(String keyspace, String sourceTable, String destinationTable) { + + var select = String.format("SELECT * FROM %s.%s WHERE token(%s) > ? AND token(%s) <= ?", + keyspace, sourceTable, columnId, columnId); + + var selectStatement = session.prepare(select); + + var insert = String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (?, ?, ?)", + keyspace, destinationTable, columnId, columnData, columnTimestamp); + + var insertStatement = session.prepare(insert); + // insertStatement.setConsistencyLevel(ConsistencyLevel.ONE); + + // perform full table scan in parallel using token ranges + // optimal for copying large amounts of data + cluster.getMetadata().getTokenRanges().stream() + .flatMap(range -> range.unwrap().stream()) + .map(range -> selectStatement.bind(range.getStart(), range.getEnd())) + .map(selectBoundStatement -> executorService.submit(() -> batchInsert(selectBoundStatement, insertStatement))) + .forEach(this::awaitThread); + + } + + private void batchInsert(BoundStatement select, PreparedStatement insert) { + // unlogged removes the log record for increased insert speed + var batchStatement = new BatchStatement(BatchStatement.Type.UNLOGGED); + + session.execute(select).all().stream() + .map(r -> Triplet.of( + r.get(columnId, UUID.class), + r.get(columnData, String.class), + r.get(columnTimestamp, Date.class))) + .map(t -> insert.bind(t.value1(), t.value2(), t.value3())) + .forEach(batchStatement::add); + + session.execute(batchStatement); + } + + private void awaitThread(Future future) { + try { + future.get(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + LOGGER.error("Interrupted thread while copying data: ", e); + } catch (ExecutionException e) { + LOGGER.error("Error while copying data: ", e); + } + } + + @Override + public void close() { + // gracefully shutdown executor service + executorService.shutdown(); + // close scylla session + ScyllaSessionPool.closeSession(scyllaConfig); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaDestination.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaDestination.java new file mode 100644 index 0000000..1a60831 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaDestination.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScyllaDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(ScyllaDestination.class); + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new ScyllaDestination()).run(args); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + var scyllaConfig = new ScyllaConfig(config); + // add random uuid to avoid conflicts with existing tables. + String tableName = "table_" + UUID.randomUUID().toString().replace("-", ""); + ScyllaCqlProvider scyllaCqlProvider = null; + try { + scyllaCqlProvider = new ScyllaCqlProvider(scyllaConfig); + // check connection and write permissions + scyllaCqlProvider.createKeyspaceIfNotExists(scyllaConfig.getKeyspace()); + scyllaCqlProvider.createTableIfNotExists(scyllaConfig.getKeyspace(), tableName); + scyllaCqlProvider.insert(scyllaConfig.getKeyspace(), tableName, "{}"); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (Exception e) { + LOGGER.error("Can't establish Scylla connection with reason: ", e); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED); + } finally { + if (scyllaCqlProvider != null) { + try { + scyllaCqlProvider.dropTableIfExists(scyllaConfig.getKeyspace(), tableName); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", tableName, e); + } + scyllaCqlProvider.close(); + } + } + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + return new ScyllaMessageConsumer(new ScyllaConfig(config), configuredCatalog, outputRecordCollector); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaMessageConsumer.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaMessageConsumer.java new file mode 100644 index 0000000..2c9edfb --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaMessageConsumer.java @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ScyllaMessageConsumer extends FailureTrackingAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(ScyllaMessageConsumer.class); + + private final ScyllaConfig scyllaConfig; + + private final Consumer outputRecordCollector; + + private final Map scyllaStreams; + + private final ScyllaCqlProvider scyllaCqlProvider; + + public ScyllaMessageConsumer(ScyllaConfig scyllaConfig, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) { + this.scyllaConfig = scyllaConfig; + this.outputRecordCollector = outputRecordCollector; + this.scyllaCqlProvider = new ScyllaCqlProvider(scyllaConfig); + var nameTransformer = new ScyllaNameTransformer(scyllaConfig); + this.scyllaStreams = configuredCatalog.getStreams().stream() + .collect(Collectors.toUnmodifiableMap( + AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam, + k -> new ScyllaStreamConfig( + nameTransformer.outputKeyspace(k.getStream().getNamespace()), + nameTransformer.outputTable(k.getStream().getName()), + nameTransformer.outputTmpTable(k.getStream().getName()), + k.getDestinationSyncMode()))); + } + + @Override + protected void startTracked() { + scyllaStreams.forEach((k, v) -> { + scyllaCqlProvider.createKeyspaceIfNotExists(v.getKeyspace()); + scyllaCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTempTableName()); + }); + } + + @Override + protected void acceptTracked(AirbyteMessage message) { + if (message.getType() == AirbyteMessage.Type.RECORD) { + var messageRecord = message.getRecord(); + var streamConfig = + scyllaStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord)); + if (streamConfig == null) { + throw new IllegalArgumentException("Unrecognized destination stream"); + } + var data = Jsons.serialize(messageRecord.getData()); + scyllaCqlProvider.insert(streamConfig.getKeyspace(), streamConfig.getTempTableName(), data); + } else if (message.getType() == AirbyteMessage.Type.STATE) { + outputRecordCollector.accept(message); + } else { + LOGGER.warn("Unsupported airbyte message type: {}", message.getType()); + } + } + + @Override + protected void close(boolean hasFailed) { + if (!hasFailed) { + scyllaStreams.forEach((k, v) -> { + try { + scyllaCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTableName()); + switch (v.getDestinationSyncMode()) { + case APPEND -> { + scyllaCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + case OVERWRITE -> { + scyllaCqlProvider.truncate(v.getKeyspace(), v.getTableName()); + scyllaCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName()); + } + default -> throw new UnsupportedOperationException("Unsupported destination sync mode"); + } + } catch (Exception e) { + LOGGER.error("Error while copying data to table {}: ", v.getTableName(), e); + } + }); + } + + scyllaStreams.forEach((k, v) -> { + try { + scyllaCqlProvider.dropTableIfExists(v.getKeyspace(), v.getTempTableName()); + } catch (Exception e) { + LOGGER.error("Error while deleting temp table {} with reason: ", v.getTempTableName(), e); + } + }); + scyllaCqlProvider.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformer.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformer.java new file mode 100644 index 0000000..c45a9db --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformer.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.google.common.base.CharMatcher; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.text.Names; + +class ScyllaNameTransformer extends StandardNameTransformer { + + private final ScyllaConfig scyllaConfig; + + public ScyllaNameTransformer(ScyllaConfig scyllaConfig) { + this.scyllaConfig = scyllaConfig; + } + + String outputKeyspace(String namespace) { + if (namespace == null || namespace.isBlank()) { + return scyllaConfig.getKeyspace(); + } + return CharMatcher.is('_').trimLeadingFrom(Names.toAlphanumericAndUnderscore(namespace)); + } + + String outputTable(String streamName) { + var tableName = super.getRawTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a scylla table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputTmpTable(String streamName) { + var tableName = super.getTmpTableName(streamName.toLowerCase()).substring(1); + // max allowed length for a scylla table is 48 characters + return tableName.length() > 48 ? tableName.substring(0, 48) : tableName; + } + + String outputColumn(String columnName) { + return Names.doubleQuote(columnName.toLowerCase()); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaSessionPool.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaSessionPool.java new file mode 100644 index 0000000..e6806dc --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaSessionPool.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.Session; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +class ScyllaSessionPool { + + private static final ConcurrentHashMap> sessions; + + static { + sessions = new ConcurrentHashMap<>(); + } + + private ScyllaSessionPool() { + + } + + static Tuple initSession(ScyllaConfig scyllaConfig) { + var cachedSession = sessions.get(scyllaConfig); + if (cachedSession != null) { + cachedSession.value3().incrementAndGet(); + return Tuple.of(cachedSession.value1(), cachedSession.value2()); + } else { + var cluster = Cluster.builder() + .addContactPoint(scyllaConfig.getAddress()) + .withPort(scyllaConfig.getPort()) + .withCredentials(scyllaConfig.getUsername(), scyllaConfig.getPassword()) + .build(); + var session = cluster.connect(); + sessions.put(scyllaConfig, Triplet.of(cluster, session, new AtomicInteger(1))); + return Tuple.of(cluster, session); + } + } + + static void closeSession(ScyllaConfig scyllaConfig) { + var session = sessions.get(scyllaConfig); + if (session == null) { + throw new IllegalStateException("No session for the provided config"); + } + int usage = session.value3().decrementAndGet(); + if (usage < 1) { + session.value2().close(); + session.value1().close(); + sessions.remove(scyllaConfig); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaStreamConfig.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaStreamConfig.java new file mode 100644 index 0000000..6163fca --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/ScyllaStreamConfig.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import io.airbyte.protocol.models.v0.DestinationSyncMode; + +/* + * Immutable configuration class for storing destination stream config. + */ +class ScyllaStreamConfig { + + private final String keyspace; + + private final String tableName; + + private final String tempTableName; + + private final DestinationSyncMode destinationSyncMode; + + public ScyllaStreamConfig(String keyspace, + String tableName, + String tempTableName, + DestinationSyncMode destinationSyncMode) { + this.keyspace = keyspace; + this.tableName = tableName; + this.tempTableName = tempTableName; + this.destinationSyncMode = destinationSyncMode; + } + + public String getKeyspace() { + return keyspace; + } + + public String getTableName() { + return tableName; + } + + public String getTempTableName() { + return tempTableName; + } + + public DestinationSyncMode getDestinationSyncMode() { + return destinationSyncMode; + } + + @Override + public String toString() { + return "ScyllaStreamConfig{" + + "keyspace='" + keyspace + '\'' + + ", tableName='" + tableName + '\'' + + ", tempTableName='" + tempTableName + '\'' + + ", destinationSyncMode=" + destinationSyncMode + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Triplet.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Triplet.java new file mode 100644 index 0000000..5be53f3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Triplet.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +public class Triplet { + + private final V1 value1; + + private final V2 value2; + + private final V3 value3; + + public Triplet(V1 value1, V2 value2, V3 value3) { + this.value1 = value1; + this.value2 = value2; + this.value3 = value3; + } + + public static Triplet of(V1 value1, V2 value2, V3 value3) { + return new Triplet<>(value1, value2, value3); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + public V3 value3() { + return value3; + } + + @Override + public String toString() { + return "Triplet{" + + "value1=" + value1 + + ", value2=" + value2 + + ", value3=" + value3 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Tuple.java b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Tuple.java new file mode 100644 index 0000000..13f01dd --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/java/io/airbyte/integrations/destination/scylla/Tuple.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +public class Tuple { + + private final V1 value1; + + private final V2 value2; + + public Tuple(V1 value1, V2 value2) { + this.value1 = value1; + this.value2 = value2; + } + + public static Tuple of(V1 value1, V2 value2) { + return new Tuple<>(value1, value2); + } + + public V1 value1() { + return value1; + } + + public V2 value2() { + return value2; + } + + @Override + public String toString() { + return "Tuple{" + + "value1=" + value1 + + ", value2=" + value2 + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-scylla/src/main/resources/spec.json new file mode 100644 index 0000000..fb1ea41 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/main/resources/spec.json @@ -0,0 +1,57 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/scylla", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Scylla Destination Spec", + "type": "object", + "required": ["keyspace", "username", "password", "address", "port"], + "additionalProperties": true, + "properties": { + "keyspace": { + "title": "Keyspace", + "description": "Default Scylla keyspace to create data in.", + "type": "string", + "order": 0 + }, + "username": { + "title": "Username", + "description": "Username to use to access Scylla.", + "type": "string", + "order": 1 + }, + "password": { + "title": "Password", + "description": "Password associated with Scylla.", + "type": "string", + "airbyte_secret": true, + "order": 2 + }, + "address": { + "title": "Address", + "description": "Address to connect to.", + "type": "string", + "order": 3 + }, + "port": { + "title": "Port", + "description": "Port of Scylla.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 9042, + "order": 4 + }, + "replication": { + "title": "Replication factor", + "type": "integer", + "description": "Indicates to how many nodes the data should be replicated to.", + "default": 1, + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaContainerInitializr.java b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaContainerInitializr.java new file mode 100644 index 0000000..acfcb20 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaContainerInitializr.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import org.testcontainers.containers.GenericContainer; + +class ScyllaContainerInitializr { + + private static ScyllaContainer scyllaContainer; + + private ScyllaContainerInitializr() { + + } + + public static ScyllaContainer initContainer() { + if (scyllaContainer == null) { + scyllaContainer = new ScyllaContainer() + .withExposedPorts(9042) + // single cpu core cluster + .withCommand("--smp 1"); + } + scyllaContainer.start(); + return scyllaContainer; + } + + static class ScyllaContainer extends GenericContainer { + + public ScyllaContainer() { + super("scylladb/scylla:4.5.0"); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProviderTest.java b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProviderTest.java new file mode 100644 index 0000000..dea28fb --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaCqlProviderTest.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.datastax.driver.core.exceptions.InvalidQueryException; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class ScyllaCqlProviderTest { + + private static final String SCYLLA_KEYSPACE = "scylla_keyspace"; + + private static final String SCYLLA_TABLE = "scylla_table"; + + private ScyllaCqlProvider scyllaCqlProvider; + + private ScyllaNameTransformer nameTransformer; + + @BeforeAll + void setup() { + var scyllaContainer = ScyllaContainerInitializr.initContainer(); + var scyllaConfig = TestDataFactory.scyllaConfig( + HostPortResolver.resolveHost(scyllaContainer), + HostPortResolver.resolvePort(scyllaContainer)); + this.scyllaCqlProvider = new ScyllaCqlProvider(scyllaConfig); + this.nameTransformer = new ScyllaNameTransformer(scyllaConfig); + this.scyllaCqlProvider.createKeyspaceIfNotExists(SCYLLA_KEYSPACE); + this.scyllaCqlProvider.createTableIfNotExists(SCYLLA_KEYSPACE, SCYLLA_TABLE); + } + + @AfterEach + void clean() { + scyllaCqlProvider.truncate(SCYLLA_KEYSPACE, SCYLLA_TABLE); + } + + @Test + void testCreateKeySpaceIfNotExists() { + String keyspace = nameTransformer.outputKeyspace("test_keyspace"); + assertDoesNotThrow(() -> scyllaCqlProvider.createKeyspaceIfNotExists(keyspace)); + } + + @Test + void testCreateTableIfNotExists() { + String table = nameTransformer.outputTable("test_stream"); + assertDoesNotThrow(() -> scyllaCqlProvider.createTableIfNotExists(SCYLLA_KEYSPACE, table)); + } + + @Test + void testInsert() { + // given + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data1\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data2\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data3\"}"); + + // when + var resultSet = scyllaCqlProvider.select(SCYLLA_KEYSPACE, SCYLLA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.value2().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.value2().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.value2().equals("{\"property\":\"data3\"}")); + + } + + @Test + void testTruncate() { + // given + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data1\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data2\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, SCYLLA_TABLE, "{\"property\":\"data3\"}"); + + // when + scyllaCqlProvider.truncate(SCYLLA_KEYSPACE, SCYLLA_TABLE); + var resultSet = scyllaCqlProvider.select(SCYLLA_KEYSPACE, SCYLLA_TABLE); + + // then + assertThat(resultSet) + .isNotNull() + .isEmpty(); + } + + @Test + void testDropTableIfExists() { + // given + String table = nameTransformer.outputTmpTable("test_stream"); + scyllaCqlProvider.createTableIfNotExists(SCYLLA_KEYSPACE, table); + + // when + scyllaCqlProvider.dropTableIfExists(SCYLLA_KEYSPACE, table); + + // then + assertThrows(InvalidQueryException.class, () -> scyllaCqlProvider.select(SCYLLA_KEYSPACE, table)); + } + + @Test + void testCopy() { + // given + String tmpTable = nameTransformer.outputTmpTable("test_stream_copy"); + scyllaCqlProvider.createTableIfNotExists(SCYLLA_KEYSPACE, tmpTable); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, tmpTable, "{\"property\":\"data1\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, tmpTable, "{\"property\":\"data2\"}"); + scyllaCqlProvider.insert(SCYLLA_KEYSPACE, tmpTable, "{\"property\":\"data3\"}"); + + String rawTable = nameTransformer.outputTable("test_stream_copy"); + scyllaCqlProvider.createTableIfNotExists(SCYLLA_KEYSPACE, rawTable); + + // when + scyllaCqlProvider.copy(SCYLLA_KEYSPACE, tmpTable, rawTable); + var resultSet = scyllaCqlProvider.select(SCYLLA_KEYSPACE, rawTable); + + // then + assertThat(resultSet) + .isNotNull() + .hasSize(3) + .anyMatch(r -> r.value2().equals("{\"property\":\"data1\"}")) + .anyMatch(r -> r.value2().equals("{\"property\":\"data2\"}")) + .anyMatch(r -> r.value2().equals("{\"property\":\"data3\"}")); + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationAcceptanceTest.java new file mode 100644 index 0000000..d2f9c7d --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationAcceptanceTest.java @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.commons.json.Jsons; +import io.airbyte.integrations.destination.scylla.ScyllaContainerInitializr.ScyllaContainer; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; + +class ScyllaDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private JsonNode configJson; + + private ScyllaCqlProvider scyllaCqlProvider; + + private ScyllaNameTransformer nameTransformer; + + private static ScyllaContainer scyllaContainer; + + @Override + protected String getImageName() { + return "airbyte/destination-scylla:dev"; + } + + @BeforeAll + static void initContainer() { + scyllaContainer = ScyllaContainerInitializr.initContainer(); + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) { + configJson = TestDataFactory.jsonConfig( + HostPortResolver.resolveHost(scyllaContainer), + HostPortResolver.resolvePort(scyllaContainer)); + final var scyllaConfig = new ScyllaConfig(configJson); + this.scyllaCqlProvider = new ScyllaCqlProvider(scyllaConfig); + this.nameTransformer = new ScyllaNameTransformer(scyllaConfig); + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) { + scyllaCqlProvider.metadata().stream() + .filter(m -> !m.value1().startsWith("system")) + .forEach(meta -> { + final var keyspace = meta.value1(); + meta.value2().forEach(table -> scyllaCqlProvider.truncate(keyspace, table)); + }); + } + + @Override + protected JsonNode getConfig() { + return configJson; + } + + @Override + protected JsonNode getFailCheckConfig() { + return TestDataFactory.jsonConfig("127.129.0.1", 8080); + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) { + final var keyspace = nameTransformer.outputKeyspace(namespace); + final var table = nameTransformer.outputTable(streamName); + return scyllaCqlProvider.select(keyspace, table).stream() + .sorted(Comparator.comparing(Triplet::value3)) + .map(Triplet::value2) + .map(Jsons::deserialize) + .collect(Collectors.toList()); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationTest.java b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationTest.java new file mode 100644 index 0000000..53460b3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/ScyllaDestinationTest.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import static org.assertj.core.api.Assertions.assertThat; + +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.integrations.destination.scylla.ScyllaContainerInitializr.ScyllaContainer; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class ScyllaDestinationTest { + + private ScyllaDestination scyllaDestination; + + private ScyllaContainer scyllaContainer; + + @BeforeAll + void setup() { + this.scyllaContainer = ScyllaContainerInitializr.initContainer(); + this.scyllaDestination = new ScyllaDestination(); + } + + @Test + void testCheckWithStatusSucceeded() { + + var jsonConfiguration = TestDataFactory.jsonConfig( + HostPortResolver.resolveHost(scyllaContainer), + HostPortResolver.resolvePort(scyllaContainer)); + + var connectionStatus = scyllaDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED); + } + + @Test + void testCheckWithStatusFailed() { + + var jsonConfiguration = TestDataFactory.jsonConfig("192.0.2.1", 8080); + + var connectionStatus = scyllaDestination.check(jsonConfiguration); + + assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED); + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java new file mode 100644 index 0000000..4773a87 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test-integration/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; + +class TestDataFactory { + + private TestDataFactory() { + + } + + static ScyllaConfig scyllaConfig(String address, int port) { + return new ScyllaConfig( + "default_keyspace", + "usr", + "pw", + address, + port, + 2); + } + + static JsonNode jsonConfig(String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", "usr") + .put("password", "pw") + .put("address", address) + .put("port", port) + .put("replication", 2) + .build()); + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaConfigTest.java b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaConfigTest.java new file mode 100644 index 0000000..cb1bbce --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaConfigTest.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class ScyllaConfigTest { + + private ScyllaConfig scyllaConfig; + + @BeforeEach + void setup() { + var jsonNode = TestDataFactory.jsonConfig("127.0.0.1", 9042); + this.scyllaConfig = new ScyllaConfig(jsonNode); + } + + @Test + void testConfig() { + + assertThat(scyllaConfig) + .hasFieldOrPropertyWithValue("keyspace", "default_keyspace") + .hasFieldOrPropertyWithValue("username", "usr") + .hasFieldOrPropertyWithValue("password", "pw") + .hasFieldOrPropertyWithValue("address", "127.0.0.1") + .hasFieldOrPropertyWithValue("port", 9042) + .hasFieldOrPropertyWithValue("replication", 2); + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformerTest.java b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformerTest.java new file mode 100644 index 0000000..f269168 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaNameTransformerTest.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class ScyllaNameTransformerTest { + + private ScyllaNameTransformer scyllaNameTransformer; + + @BeforeAll + void setup() { + var scyllaConfig = TestDataFactory.scyllaConfig("127.0.0.1", 9042); + this.scyllaNameTransformer = new ScyllaNameTransformer(scyllaConfig); + } + + @Test + void testOutputTable() { + + var table = scyllaNameTransformer.outputTable("stream_name"); + + assertThat(table).matches("airbyte_raw_stream_name"); + + } + + @Test + void testOutputTmpTable() { + + var table = scyllaNameTransformer.outputTmpTable("stream_name"); + + assertThat(table).matches("airbyte_tmp_+[a-z]+_stream_name"); + + } + + @Test + void testOutputKeyspace() { + + var keyspace = scyllaNameTransformer.outputKeyspace("***keyspace^h"); + + assertThat(keyspace).matches("keyspace_h"); + + } + + @Test + void outputColumn() { + + var column = scyllaNameTransformer.outputColumn("_airbyte_data"); + + assertThat(column).matches("\"_airbyte_data\""); + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaRecordConsumerTest.java b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaRecordConsumerTest.java new file mode 100644 index 0000000..7dcf527 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/ScyllaRecordConsumerTest.java @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer; +import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.util.function.Consumer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.testcontainers.containers.GenericContainer; + +@DisplayName("ScyllaRecordConsumer") +@ExtendWith(MockitoExtension.class) +public class ScyllaRecordConsumerTest extends PerStreamStateMessageTest { + + private static ScyllaContainer scyllaContainer; + + @Mock + private Consumer outputRecordCollector; + + private ScyllaMessageConsumer consumer; + + @Mock + ScyllaConfig scyllaConfig; + + @Mock + private ConfiguredAirbyteCatalog configuredCatalog; + + public static ScyllaContainer initContainer() { + if (scyllaContainer == null) { + scyllaContainer = new ScyllaContainer() + .withExposedPorts(9042) + // single cpu core cluster + .withCommand("--smp 1"); + } + scyllaContainer.start(); + return scyllaContainer; + } + + @BeforeEach + public void init() { + ScyllaContainer scyllaContainer = initContainer(); + JsonNode configJson = TestDataFactory.jsonConfig( + HostPortResolver.resolveHost(scyllaContainer), + HostPortResolver.resolvePort(scyllaContainer)); + var scyllaConfig = new ScyllaConfig(configJson); + consumer = new ScyllaMessageConsumer(scyllaConfig, configuredCatalog, outputRecordCollector); + } + + @Override + protected Consumer getMockedConsumer() { + return outputRecordCollector; + } + + @Override + protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() { + return consumer; + } + + static class ScyllaContainer extends GenericContainer { + + public ScyllaContainer() { + super("scylladb/scylla:4.5.0"); + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java new file mode 100644 index 0000000..78bd678 --- /dev/null +++ b/airbyte-integrations/connectors/destination-scylla/src/test/java/io/airbyte/integrations/destination/scylla/TestDataFactory.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.scylla; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; + +class TestDataFactory { + + static JsonNode jsonConfig(String address, int port) { + return Jsons.jsonNode(ImmutableMap.builder() + .put("keyspace", "default_keyspace") + .put("username", "usr") + .put("password", "pw") + .put("address", address) + .put("port", port) + .put("replication", 2) + .build()); + } + + static ScyllaConfig scyllaConfig(String address, int port) { + return new ScyllaConfig( + "default_keyspace", + "usr", + "pw", + address, + port, + 2); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/README.md b/airbyte-integrations/connectors/destination-selectdb/README.md new file mode 100644 index 0000000..cca3da0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/README.md @@ -0,0 +1,72 @@ +# Destination Selectdb + +This is the repository for the Selectdb destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.com/integrations/destinations/selectdb). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-selectdb:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.com/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-selectdb:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-selectdb:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-selectdb:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-selectdb:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-selectdb:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-selectdb:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/selectdb`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/selectdbDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-selectdb:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-selectdb:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-selectdb test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/selectdb.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-selectdb/bootstrap.md b/airbyte-integrations/connectors/destination-selectdb/bootstrap.md new file mode 100644 index 0000000..7303dcb --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/bootstrap.md @@ -0,0 +1,41 @@ +# SelectDB destination + + +SelectDB destination adopts MySQL protocol(JDBC) and copy into to exchange data. + +1. JDBC is used to manipulate the data table structure and execute the create table statement before data import +2. Copy Into is an import method based on Object storage, For SelectDB destination, first upload csv file into selectdb internal stage, and then copy into SelectDB with transaction operation. + +## Introduction to SelectDB + +SelectDB is a cloud-native realtime data warehouse built by the core developers of Apache Doris based on the Apache Doris open source project. +[SelectDB](https://en.selectdb.com/docs/selectdb) + +### Core Features + +- **Extremely fast** : In terms of storage, it adopts efficient columnar storage and data indexing; in terms of computing, it relies on the MPP distributed computing architecture and the vectorized execution engine optimized for X64 and ARM64; in the ClickBench public performance evaluation, it is at the world's leading level. +- **Single unified** : It can run multiple analytical workloads on a single system. It supports real-time/interactive/batch computing types, structured/semi-structured data types, and federated querying with external data lakes and databases. +- **Easy to use** : Compatible with MySQL network protocols; powerful and easy-to-use WebUI-based database management tools, and rich connectors for integration with Spark/Flink/dbt/Kafka. +- **Cost-effective** : Deeply adapted to the cloud platforms, and adopts an implementation architecture that separates storage and computing. In terms of computing, it provides on-demand automatic expansion and contraction, and the storage adopts tiered storage of hot and cold data. +- **Open** : It is developed based on the open source Apache Doris, and data can be freely migrated with Doris. Runs on multiple clouds and provides a consistent user experience. +- **Enterprise-grade features** : provides user authentication and access control, data protection and backup. In the future, it will also provide data masking, finer-grained authority control, and data lineage to meet the needs of data governance. + + +### Difference with Apache Doris + +SelectDB is developed based on the Apache Doris. SelectDB will continue to work with the Doris community to strengthen the open source kernel. At the same time, SelectDB also provides the following enhanced features and services for enterprise customers. +- **Apache Doris LTS version** : Provides up to 18 months of Apache Doris LTS version to meet the needs of enterprises for stronger stability of Doris. This version is free and the code is open source. +- **Cloud-native kernel** : In addition to the enhancement of the open source Doris kernel, it also provides a deeply adapted cloud-native kernel for public cloud platforms, so as to provide enterprises with best price / performance and enterprise-grade features. +- **Native management tools** : provides powerful and easy-to-use web-based database management and development tools. It can be used to replace tools like Navicat. +- **Professional technical support** : Professional technical support services are provided for open source Apache Doris and SelectDB products. + +### Two Product Editions + +According to the needs of different enterprises, there are currently two editions for SelectDB: + +- **SelectDB Cloud:** A fully managed data warehouse as a service on public clouds. +- **SelectDB Enterprise:** Delivered as on-premises software, deployed in your IDC or VPC of public cloud. + +SelectDB 1.0 was opened for trial application in July 2022, and dozens of companies have already tried it. + +The SelectDB 2.0 preview is now open for trial application. Starting from SelectDB 2.0, SelectDB has also officially launched the international site. If customers want to use AWS, Azure and GCP, please visit SelectDB International Station; if customers want to use Alibaba Cloud, Tencent Cloud and Huawei Cloud, please visit SelectDB China Station. \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-selectdb/build.gradle b/airbyte-integrations/connectors/destination-selectdb/build.gradle new file mode 100644 index 0000000..0a654ec --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/build.gradle @@ -0,0 +1,28 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.selectdb.SelectdbDestination' +} + +dependencies { + implementation 'org.apache.commons:commons-csv:1.4' + implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.16' +} diff --git a/airbyte-integrations/connectors/destination-selectdb/icon.svg b/airbyte-integrations/connectors/destination-selectdb/icon.svg new file mode 100644 index 0000000..9e0e93c --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/icon.svg @@ -0,0 +1,21 @@ + + + + + + + + diff --git a/airbyte-integrations/connectors/destination-selectdb/metadata.yaml b/airbyte-integrations/connectors/destination-selectdb/metadata.yaml new file mode 100644 index 0000000..ec29978 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorType: destination + definitionId: 50a559a7-6323-4e33-8aa0-51dfd9dfadac + dockerImageTag: 0.1.0 + connectorSubtype: database + dockerRepository: airbyte/destination-selectdb + githubIssueLabel: destination-selectdb + icon: selectdb.svg + license: MIT + name: SelectDB + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/selectdb + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/BaseResponse.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/BaseResponse.java new file mode 100644 index 0000000..864328c --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/BaseResponse.java @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class BaseResponse { + + private int code; + private String msg; + private T data; + private int count; + + public int getCode() { + return code; + } + + public String getMsg() { + return msg; + } + + public T getData() { + return data; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/CopyIntoResp.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/CopyIntoResp.java new file mode 100644 index 0000000..dc2c18f --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/CopyIntoResp.java @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.Map; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class CopyIntoResp extends BaseResponse { + + private String code; + private String exception; + private Map result; + + public String getDataCode() { + return code; + } + + public String getException() { + return exception; + } + + public Map getResult() { + return result; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/LabelInfo.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/LabelInfo.java new file mode 100644 index 0000000..aff5f0c --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/LabelInfo.java @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import java.util.UUID; + +public class LabelInfo { + + private String prefix; + + private String table; + + public LabelInfo(String labelPrefix, String table) { + this.prefix = labelPrefix; + this.table = table; + } + + public String label() { + return prefix + "_" + table + "_" + UUID.randomUUID() + System.currentTimeMillis(); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConnectionOptions.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConnectionOptions.java new file mode 100644 index 0000000..1f3ab90 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConnectionOptions.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; + +public class SelectdbConnectionOptions { + + protected final String driverName = "com.mysql.jdbc.Driver"; + protected final String cjDriverName = "com.mysql.cj.jdbc.Driver"; + + private String db; + private static String DB_KEY = "database"; + + private String table; + private static final String TABLE_KEY = "table"; + + private String user; + private static final String USER_KEY = "user_name"; + + private String pwd; + private static final String PWD_KEY = "password"; + + private String loadUrl; + private static final String LOAD_URL_KEY = "load_url"; + + private String jdbcUrl; + private static final String JDBC_URL_KEY = "jdbc_url"; + + private String clusterName; + private static final String CLUSTER_NAME_KEY = "cluster_name"; + + public static SelectdbConnectionOptions getSelectdbConnection(final JsonNode config, String table) { + return new SelectdbConnectionOptions( + config.get(DB_KEY).asText(), + table, + config.get(LOAD_URL_KEY).asText(), + config.get(JDBC_URL_KEY).asText(), + config.get(CLUSTER_NAME_KEY).asText(), + config.get(USER_KEY).asText(), + config.get(PWD_KEY) == null ? "" : config.get(PWD_KEY).asText()); + + } + + public SelectdbConnectionOptions(String db, + String table, + String loadUrl, + String jdbcUrl, + String clusterName, + String username, + String password) { + this.db = db; + this.table = table; + this.loadUrl = Preconditions.checkNotNull(loadUrl, "loadUrl is empty"); + this.jdbcUrl = Preconditions.checkNotNull(jdbcUrl, "jdbcUrl is empty"); + this.clusterName = Preconditions.checkNotNull(clusterName, "clusterName is empty"); + this.user = username; + this.pwd = password; + } + + public String getLoadUrl() { + return loadUrl; + } + + public String getJdbcUrl() { + return jdbcUrl; + } + + public String getClusterName() { + return clusterName; + } + + public String getDb() { + return db; + } + + public String getTable() { + return table; + } + + public String getUser() { + return user; + } + + public String getPwd() { + return pwd; + } + + public String getCjDriverName() { + return cjDriverName; + } + + public String getDriverName() { + return driverName; + } + + @Override + public String toString() { + return "SelectdbConnectionOptions{" + + "driverName='" + driverName + '\'' + + ", cjDriverName='" + cjDriverName + '\'' + + ", db='" + db + '\'' + + ", table='" + table + '\'' + + ", user='" + user + '\'' + + ", pwd='" + pwd + '\'' + + ", loadUrl='" + loadUrl + '\'' + + ", jdbcUrl='" + jdbcUrl + '\'' + + ", clusterName='" + clusterName + '\'' + + '}'; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConsumer.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConsumer.java new file mode 100644 index 0000000..c14c7b2 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbConsumer.java @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.core.io.JsonStringEncoder; +import io.airbyte.cdk.integrations.base.CommitOnStateAirbyteMessageConsumer; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Map; +import java.util.UUID; +import java.util.function.Consumer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SelectdbConsumer extends CommitOnStateAirbyteMessageConsumer { + + private static final Logger LOGGER = LoggerFactory.getLogger(SelectdbConsumer.class); + + private final ConfiguredAirbyteCatalog catalog; + private final Map writeConfigs; + + private JsonStringEncoder jsonEncoder; + + public SelectdbConsumer( + final Map writeConfigs, + final ConfiguredAirbyteCatalog catalog, + final Consumer outputRecordCollector) { + super(outputRecordCollector); + jsonEncoder = JsonStringEncoder.getInstance(); + this.catalog = catalog; + this.writeConfigs = writeConfigs; + LOGGER.info("initializing SelectdbConsumer."); + } + + @Override + public void commit() throws Exception { + for (final SelectdbWriteConfig writeConfig : writeConfigs.values()) { + writeConfig.getWriter().flush(); + } + } + + @Override + protected void startTracked() throws Exception {} + + @Override + protected void acceptTracked(AirbyteMessage msg) throws Exception { + if (msg.getType() != AirbyteMessage.Type.RECORD) { + return; + } + final AirbyteRecordMessage recordMessage = msg.getRecord(); + if (!writeConfigs.containsKey(recordMessage.getStream())) { + throw new IllegalArgumentException( + String.format( + "Message contained record from a stream that was not in the catalog. \ncatalog: %s , \nmessage: %s", + Jsons.serialize(catalog), Jsons.serialize(recordMessage))); + } + + writeConfigs.get(recordMessage.getStream()).getWriter().printRecord( + UUID.randomUUID(), + // new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(recordMessage.getEmittedAt())), + recordMessage.getEmittedAt(), + new String(jsonEncoder.quoteAsString(Jsons.serialize(recordMessage.getData())))); + + } + + @Override + protected void close(boolean hasFailed) throws Exception { + LOGGER.info("finalizing SelectdbConsumer"); + for (final Map.Entry entries : writeConfigs.entrySet()) { + try { + entries.getValue().getWriter().flush(); + entries.getValue().getWriter().close(); + } catch (final Exception e) { + hasFailed = true; + LOGGER.error("failed to close writer for: {}", entries.getKey()); + } + } + + try { + for (final SelectdbWriteConfig value : writeConfigs.values()) { + value.getsci().firstCommit(); + } + } catch (final Exception e) { + hasFailed = true; + final String message = "Failed to upload selectdb stage in destination: "; + LOGGER.error(message + e.getMessage()); + } + try { + if (!hasFailed) { + for (final SelectdbWriteConfig writeConfig : writeConfigs.values()) { + if (writeConfig.getsci().isUpload()) { + writeConfig.getsci().commitTransaction(); + } + LOGGER.info("upload commit (temp file: {} ) successed ", writeConfig.getsci().getPath()); + } + } else { + final String message = "Failed to copy into selectdb in destination"; + LOGGER.error(message); + throw new IOException(message); + } + } finally { + for (final SelectdbWriteConfig writeConfig : writeConfigs.values()) { + Files.deleteIfExists(writeConfig.getsci().getPath()); + writeConfig.getsci().close(); + } + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbCopyInto.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbCopyInto.java new file mode 100644 index 0000000..a3c5f95 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbCopyInto.java @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import io.airbyte.integrations.destination.selectdb.exception.CopyIntoException; +import io.airbyte.integrations.destination.selectdb.exception.UploadException; +import io.airbyte.integrations.destination.selectdb.http.HttpPostBuilder; +import io.airbyte.integrations.destination.selectdb.http.HttpPutBuilder; +import io.airbyte.integrations.destination.selectdb.utils.ResponseUtils; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.*; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.util.EntityUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SelectdbCopyInto { + + private static final Logger LOGGER = LoggerFactory.getLogger(SelectdbCopyInto.class); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String UPLOAD_URL_PATTERN = "http://%s/copy/upload"; + private static final String COPY_URL_PATTERN = "http://%s/copy/query"; + public static final Character CSV_COLUMN_SEPARATOR = '\t'; + + private final String tableName; + private final String db; + private final String clusterName; + private final String loadUrl; + private final String uploadUrlStr; + private final String jdbcUrlStr; + private final String user; + private final String passwd; + private final Integer maxRetry; + private Boolean isUpload = false; + private final Path path; + private final CloseableHttpClient httpClient; + + private static final int SUCCESS = 0; + private static final String FAIL = "1"; + + private final static String COPY_SYNC = "copy.async"; + private String COPY_INTO_SQL = ""; + private String internalSatgeFileName = ""; + private Properties copyIntoSqlProp; + + public SelectdbCopyInto( + Path path, + SelectdbConnectionOptions selectdbOptions, + LabelInfo labelInfo, + CloseableHttpClient httpClient, + String... head) { + this.loadUrl = selectdbOptions.getLoadUrl(); + this.db = selectdbOptions.getDb(); + this.tableName = selectdbOptions.getTable(); + this.clusterName = selectdbOptions.getClusterName(); + this.user = selectdbOptions.getUser(); + this.passwd = selectdbOptions.getPwd(); + this.uploadUrlStr = String.format(UPLOAD_URL_PATTERN, loadUrl); + this.jdbcUrlStr = String.format(COPY_URL_PATTERN, loadUrl); + this.copyIntoSqlProp = new Properties(); + this.maxRetry = 3; + this.path = path; + this.httpClient = httpClient; + + this.internalSatgeFileName = labelInfo.label() + ".csv"; + List files = new ArrayList<>(); + files.add(this.internalSatgeFileName); + this.COPY_INTO_SQL = buildCopyIntoSql(files); + } + + public void firstCommit() throws IOException { + Path pathChecked = Preconditions.checkNotNull(path, "upload temp CSV file is empty."); + String uploadAddress = getUploadAddress(); + LOGGER.info("redirect to s3 address:{}", uploadAddress); + try { + HttpPutBuilder putBuilder = new HttpPutBuilder(); + putBuilder.setUrl(uploadAddress) + .setCommonHeader() + .setEntity(new ByteArrayEntity(new FileInputStream(pathChecked.toFile()).readAllBytes())); + + CloseableHttpResponse execute = httpClient.execute(putBuilder.build()); + handlePreCommitResponse(execute); + } catch (Exception e) { + throw new UploadException(e); + } + this.isUpload = true; + } + + private String getUploadAddress() throws IOException { + HttpPutBuilder putBuilder = new HttpPutBuilder(); + putBuilder.setUrl(uploadUrlStr) + .setFileName(this.internalSatgeFileName) + .setCommonHeader() + .setEmptyEntity() + .baseAuth(user, passwd); + + try (CloseableHttpResponse execute = httpClient.execute(putBuilder.build())) { + int statusCode = execute.getStatusLine().getStatusCode(); + String reason = execute.getStatusLine().getReasonPhrase(); + if (statusCode == 307) { + Header location = execute.getFirstHeader("location"); + return location.getValue(); + } else { + HttpEntity entity = execute.getEntity(); + String result = entity == null ? null : EntityUtils.toString(entity); + LOGGER.error("Failed get the redirected address, status {}, reason {}, response {}", statusCode, reason, + result); + throw new RuntimeException("Could not get the redirected address."); + } + } + } + + public Boolean isUpload() { + return this.isUpload; + } + + private String buildCopyIntoSql(List fileList) { + StringBuilder sb = new StringBuilder(); + sb.append("COPY INTO `") + .append(db) + .append("`.`") + .append(tableName) + .append("` FROM @~('{").append(String.join(",", fileList)).append("}') ") + .append("PROPERTIES ("); + + // this copy into is sync + copyIntoSqlProp.put(COPY_SYNC, false); + StringJoiner props = new StringJoiner(","); + for (Map.Entry entry : copyIntoSqlProp.entrySet()) { + String key = String.valueOf(entry.getKey()); + String value = String.valueOf(entry.getValue()); + String prop = String.format("'%s'='%s'", key, value); + props.add(prop); + } + sb.append(props).append(")"); + return sb.toString(); + } + + // copy into + public void commitTransaction() throws IOException { + long start = System.currentTimeMillis(); + LOGGER.info("commit copy SQL: {}", COPY_INTO_SQL); + int statusCode = -1; + String reasonPhrase = null; + int retry = 0; + Map params = new HashMap<>(); + // params.put("cluster", clusterName); + params.put("sql", COPY_INTO_SQL); + boolean success = false; + CloseableHttpResponse response = null; + String loadResult = ""; + while (retry++ <= maxRetry) { + HttpPostBuilder postBuilder = new HttpPostBuilder(); + postBuilder.setUrl(jdbcUrlStr) + .baseAuth(user, passwd) + .setEntity(new StringEntity(OBJECT_MAPPER.writeValueAsString(params))); + try { + response = httpClient.execute(postBuilder.build()); + } catch (IOException e) { + LOGGER.error("commit error : ", e); + continue; + } + statusCode = response.getStatusLine().getStatusCode(); + reasonPhrase = response.getStatusLine().getReasonPhrase(); + if (statusCode != 200) { + LOGGER.warn("commit failed with status {} {}, reason {}", statusCode, loadUrl, reasonPhrase); + continue; + } else if (response.getEntity() != null) { + loadResult = EntityUtils.toString(response.getEntity()); + success = handleCommitResponse(loadResult); + if (success) { + LOGGER.info("commit success cost {}ms, response is {}", System.currentTimeMillis() - start, + loadResult); + break; + } else { + LOGGER.warn("commit failed, retry again"); + } + } + } + + if (!success) { + LOGGER.error("commit error with status {}, reason {}, response {}", statusCode, reasonPhrase, loadResult); + throw new CopyIntoException("commit error with " + COPY_INTO_SQL); + } + } + + public void handlePreCommitResponse(CloseableHttpResponse response) throws IOException { + try { + final int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode == 200 && response.getEntity() != null) { + String loadResult = EntityUtils.toString(response.getEntity()); + if (loadResult == null || loadResult.isBlank()) { + return; + } + LOGGER.info("response result {}", loadResult); + BaseResponse> baseResponse = new ObjectMapper().readValue(loadResult, + new TypeReference>>() {}); + if (baseResponse.getCode() == 0) { + return; + } else { + throw new RuntimeException("upload file error: " + baseResponse.getMsg()); + } + } + throw new RuntimeException("upload file error: " + response.getStatusLine().toString()); + } finally { + if (response != null) { + response.close(); + } + } + } + + public boolean handleCommitResponse(String loadResult) throws IOException { + BaseResponse baseResponse = OBJECT_MAPPER.readValue(loadResult, + new TypeReference>() {}); + if (baseResponse.getCode() == SUCCESS) { + CopyIntoResp dataResp = baseResponse.getData(); + if (FAIL.equals(dataResp.getDataCode())) { + LOGGER.error("copy into execute failed, reason:{}", loadResult); + return false; + } else { + Map result = dataResp.getResult(); + if (!result.get("state").equals("FINISHED") && !ResponseUtils.isCommitted(result.get("msg"))) { + LOGGER.error("copy into load failed, reason:{}", loadResult); + return false; + } else { + return true; + } + } + } else { + LOGGER.error("commit failed, reason:{}", loadResult); + return false; + } + } + + public Path getPath() { + return path; + } + + public void close() throws IOException { + if (null != httpClient) { + try { + httpClient.close(); + } catch (IOException e) { + throw new IOException("Closing httpClient failed.", e); + } + } + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbDestination.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbDestination.java new file mode 100644 index 0000000..9619f3c --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbDestination.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.base.Preconditions; +import io.airbyte.cdk.integrations.BaseConnector; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.integrations.destination.selectdb.http.HttpUtil; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream; +import io.airbyte.protocol.models.v0.DestinationSyncMode; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Consumer; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.io.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SelectdbDestination extends BaseConnector implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(SelectdbDestination.class); + + private static final StandardNameTransformer namingResolver = new StandardNameTransformer(); + private static HttpUtil http = new HttpUtil(); + static final String DESTINATION_TEMP_PATH_FIELD = "destination_temp_path"; + private SelectdbOperations selectdbOperations; + + public static void main(String[] args) throws Exception { + new IntegrationRunner(new SelectdbDestination()).run(args); + } + + public SelectdbDestination() { + this.selectdbOperations = new SelectdbOperations(); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + try { + Preconditions.checkNotNull(config); + FileUtils.forceMkdir(getTempPathDir(config).toFile()); + selectdbOperations.getConn(config); + } catch (final Exception e) { + return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage()); + } + return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + } + + @Override + public AirbyteMessageConsumer getConsumer(JsonNode config, + ConfiguredAirbyteCatalog configuredCatalog, + Consumer outputRecordCollector) + throws IOException, SQLException { + final Map writeConfigs = new HashMap<>(); + try { + final Path destinationDir = getTempPathDir(config); + FileUtils.forceMkdir(destinationDir.toFile()); + for (ConfiguredAirbyteStream stream : configuredCatalog.getStreams()) { + + final DestinationSyncMode syncMode = stream.getDestinationSyncMode(); + if (syncMode == null) { + throw new IllegalStateException("Undefined destination sync mode"); + } + + final String streamName = stream.getStream().getName(); + final String tableName = namingResolver.getIdentifier(streamName); + final String tmpTableName = namingResolver.getTmpTableName(streamName); + final Path tmpPath = destinationDir.resolve(tmpTableName + ".csv"); + + Statement stmt = selectdbOperations.getConn(config).createStatement(); + stmt.execute(selectdbOperations.createTableQuery(tableName)); + if (syncMode == DestinationSyncMode.OVERWRITE) { + stmt.execute(selectdbOperations.truncateTable(tableName)); + } + CSVFormat csvFormat = CSVFormat.DEFAULT + .withSkipHeaderRecord() + .withDelimiter(SelectdbCopyInto.CSV_COLUMN_SEPARATOR) + .withQuote(null) + .withHeader( + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_DATA); + final FileWriter fileWriter = new FileWriter(tmpPath.toFile(), Charset.defaultCharset(), false); + final CSVPrinter printer = new CSVPrinter(fileWriter, csvFormat); + SelectdbCopyInto sci = new SelectdbCopyInto( + tmpPath, + SelectdbConnectionOptions.getSelectdbConnection(config, tableName), + new LabelInfo("", tableName), + http.getClient(), + JavaBaseConstants.COLUMN_NAME_AB_ID, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT, + JavaBaseConstants.COLUMN_NAME_DATA); + writeConfigs.put(streamName, new SelectdbWriteConfig(sci, printer, csvFormat)); + } + } catch (SQLException | ClassNotFoundException e) { + LOGGER.error("Exception while creating Selectdb destination table: ", e); + throw new SQLException(e); + } catch (IOException e) { + LOGGER.error("Exception while handling temporary csv files : ", e); + throw new IOException(e); + } finally { + selectdbOperations.closeConn(); + } + return new SelectdbConsumer(writeConfigs, configuredCatalog, outputRecordCollector); + } + + protected Path getTempPathDir(final JsonNode config) { + Path path = Paths.get(DESTINATION_TEMP_PATH_FIELD); + Preconditions.checkNotNull(path); + if (!path.startsWith("/code/local")) { + path = Path.of("/local", path.toString()); + } + final Path normalizePath = path.normalize(); + if (!normalizePath.startsWith("/local")) { + throw new IllegalArgumentException("Copy into destination temp file should be inside the /local directory"); + } + return path; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbOperations.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbOperations.java new file mode 100644 index 0000000..05c322a --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbOperations.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SelectdbOperations { + + private static final Logger LOGGER = LoggerFactory.getLogger(SelectdbOperations.class); + + private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; + private static final String DB_URL_PATTERN = "jdbc:mysql://%s/%s?rewriteBatchedStatements=true&useUnicode=true&characterEncoding=utf8"; + + // private JsonNode config; + private Connection conn = null; + + public SelectdbOperations() { + // this.config = config; + } + + public Connection getConn(JsonNode config) throws SQLException, ClassNotFoundException { + if (conn == null) { + checkSelectdbAndConnect(config); + } + return conn; + } + + public void closeConn() throws SQLException { + if (conn != null) { + conn.close(); + } + } + + private void checkSelectdbAndConnect(JsonNode config) throws ClassNotFoundException, SQLException { + SelectdbConnectionOptions selectdbConnection = SelectdbConnectionOptions.getSelectdbConnection(config, ""); + String dbUrl = String.format(DB_URL_PATTERN, selectdbConnection.getJdbcUrl(), selectdbConnection.getDb()); + Class.forName(JDBC_DRIVER); + conn = DriverManager.getConnection(dbUrl, selectdbConnection.getUser(), selectdbConnection.getPwd()); + } + + public String truncateTable(String tableName) { + String s = "TRUNCATE TABLE `" + tableName + "`;"; + LOGGER.info("truncate selectdb table SQL : \n " + s); + return s; + } + + protected String createTableQuery(String tableName) { + String s = "CREATE TABLE IF NOT EXISTS `" + tableName + "` ( \n" + + "`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "` varchar(40),\n" + + "`" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "` BIGINT,\n" + + "`" + JavaBaseConstants.COLUMN_NAME_DATA + "` String)\n" + + "DUPLICATE KEY(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`,`" + + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "`) \n" + + "DISTRIBUTED BY HASH(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`) BUCKETS 16 ;"; + LOGGER.info("create selectdb table SQL : \n " + s); + return s; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbWriteConfig.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbWriteConfig.java new file mode 100644 index 0000000..8c28cc5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/SelectdbWriteConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; + +public class SelectdbWriteConfig { + + private final SelectdbCopyInto selectdbCopyInto; + private final CSVPrinter writer; + private final CSVFormat format; + + public SelectdbWriteConfig(SelectdbCopyInto sci, CSVPrinter writer, CSVFormat format) { + this.selectdbCopyInto = sci; + this.writer = writer; + this.format = format; + } + + public SelectdbCopyInto getsci() { + return selectdbCopyInto; + } + + public CSVFormat getFormat() { + return format; + } + + public CSVPrinter getWriter() { + return writer; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/CopyIntoException.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/CopyIntoException.java new file mode 100644 index 0000000..e3e0fb7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/CopyIntoException.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.exception; + +public class CopyIntoException extends SelectdbRuntimeException { + + public CopyIntoException(String message) { + super(message); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/SelectdbRuntimeException.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/SelectdbRuntimeException.java new file mode 100644 index 0000000..34f14d4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/SelectdbRuntimeException.java @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.exception; + +/** + * Selectdb runtime exception. + */ +public class SelectdbRuntimeException extends RuntimeException { + + public SelectdbRuntimeException(String message) { + super(message); + } + + public SelectdbRuntimeException(Throwable cause) { + super(cause); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/UploadException.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/UploadException.java new file mode 100644 index 0000000..593cf67 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/exception/UploadException.java @@ -0,0 +1,13 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.exception; + +public class UploadException extends SelectdbRuntimeException { + + public UploadException(Exception exception) { + super(exception); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPostBuilder.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPostBuilder.java new file mode 100644 index 0000000..4b281a0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPostBuilder.java @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.http; + +import com.google.common.base.Preconditions; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPost; + +/** + * Builder for HttpPost. + */ +public class HttpPostBuilder { + + String url; + Map header; + HttpEntity httpEntity; + + public HttpPostBuilder() { + header = new HashMap<>(); + } + + public HttpPostBuilder setUrl(String url) { + this.url = url; + return this; + } + + public HttpPostBuilder baseAuth(String user, String password) { + final String authInfo = user + ":" + password; + byte[] encoded = Base64.encodeBase64(authInfo.getBytes(StandardCharsets.UTF_8)); + header.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded, StandardCharsets.UTF_8)); + return this; + } + + public HttpPostBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public HttpPost build() { + Preconditions.checkNotNull(url); + Preconditions.checkNotNull(httpEntity); + HttpPost put = new HttpPost(url); + header.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPutBuilder.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPutBuilder.java new file mode 100644 index 0000000..585f9e7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpPutBuilder.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.http; + +import com.google.common.base.Preconditions; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.StringEntity; + +public class HttpPutBuilder { + + String url; + Map header; + HttpEntity httpEntity; + + public HttpPutBuilder() { + header = new HashMap<>(); + } + + public HttpPutBuilder setUrl(String url) { + this.url = url; + return this; + } + + public HttpPutBuilder setFileName(String fileName) { + header.put("fileName", fileName); + return this; + } + + public HttpPutBuilder setEmptyEntity() { + try { + this.httpEntity = new StringEntity(""); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + return this; + } + + public HttpPutBuilder setCommonHeader() { + header.put(HttpHeaders.EXPECT, "100-continue"); + return this; + } + + public HttpPutBuilder baseAuth(String user, String password) { + final String authInfo = user + ":" + password; + byte[] encoded = Base64.encodeBase64(authInfo.getBytes(StandardCharsets.UTF_8)); + header.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded, StandardCharsets.UTF_8)); + return this; + } + + public HttpPutBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public HttpPut build() { + Preconditions.checkNotNull(url); + Preconditions.checkNotNull(httpEntity); + HttpPut put = new HttpPut(url); + header.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpUtil.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpUtil.java new file mode 100644 index 0000000..b5f2fc9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/http/HttpUtil.java @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.http; + +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; + +public class HttpUtil { + + private final HttpClientBuilder httpClientBuilder = + HttpClients + .custom() + .disableRedirectHandling(); + + public CloseableHttpClient getClient() { + return httpClientBuilder.build(); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/utils/ResponseUtils.java b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/utils/ResponseUtils.java new file mode 100644 index 0000000..65968cd --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/java/io/airbyte/integrations/destination/selectdb/utils/ResponseUtils.java @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb.utils; + +import java.util.regex.Pattern; + +public class ResponseUtils { + + public static final Pattern LABEL_EXIST_PATTERN = + Pattern.compile("errCode = 2, detailMessage = Label \\[(.*)\\] " + + "has already been used, relate to txn \\[(\\d+)\\]"); + + public static final Pattern COMMITTED_PATTERN = + Pattern.compile("errCode = 2, detailMessage = No files can be copied, matched (\\d+) files, " + + "filtered (\\d+) files because files may be loading or loaded"); + + public static final String RETRY_COMMIT = "submit task failed, queue size is full: SQL submitter with block policy"; + + private ResponseUtils() {} + + public static boolean isCommitted(String msg) { + return COMMITTED_PATTERN.matcher(msg).matches(); + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-selectdb/src/main/resources/spec.json new file mode 100644 index 0000000..e06e54a --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/main/resources/spec.json @@ -0,0 +1,59 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/selectdb", + "supportsIncremental": false, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["append", "overwrite"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "SelectDB Destination Spec", + "type": "object", + "required": [ + "load_url", + "jdbc_url", + "cluster_name", + "user_name", + "password", + "database" + ], + "properties": { + "load_url": { + "title": "loadURL", + "description": "load host and port: xxx.privatelink.aliyun.com:47057", + "type": "string", + "order": 0 + }, + "jdbc_url": { + "title": "jdbcURL", + "description": "jdbc host and port: xxx.privatelink.aliyun.com:30523", + "type": "string", + "order": 1 + }, + "cluster_name": { + "title": "ClusterName", + "description": "clusterName of SelectDB", + "type": "string", + "order": 2 + }, + "user_name": { + "title": "UserName", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 4 + }, + "database": { + "title": "DataBase Name", + "description": "Name of the database.", + "type": "string", + "order": 5 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/test-integration/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-selectdb/src/test-integration/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationAcceptanceTest.java new file mode 100644 index 0000000..a3e0974 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/test-integration/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationAcceptanceTest.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.*; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import org.apache.commons.lang3.StringEscapeUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SelectdbDestinationAcceptanceTest extends DestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(SelectdbDestinationAcceptanceTest.class); + + private JsonNode configJson; + + private static final Path RELATIVE_PATH = Path.of("integration_test/test"); + + private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver"; + private static final String DB_URL_PATTERN = "jdbc:mysql://%s?rewriteBatchedStatements=true&useSSL=true&useUnicode=true&characterEncoding=utf8"; + private static Connection conn = null; + + private static final StandardNameTransformer namingResolver = new StandardNameTransformer(); + + @Override + protected String getImageName() { + return "airbyte/destination-selectdb:dev"; + } + + @BeforeAll + public static void getConnect() { + JsonNode config = Jsons.deserialize(IOs.readFile(Paths.get("secrets/config.json"))); + String dbUrl = String.format(DB_URL_PATTERN, config.get("jdbc_url").asText()); + try { + Class.forName(JDBC_DRIVER); + conn = + DriverManager.getConnection(dbUrl, config.get("user_name").asText(), config.get("password") == null ? "" : config.get("password").asText()); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + @AfterAll + public static void closeConnect() throws SQLException { + if (conn != null) { + conn.close(); + } + } + + @Override + protected JsonNode getConfig() { + // TODO: Generate the configuration JSON file to be used for running the destination during the test + // configJson can either be static and read from secrets/config.json directly + // or created in the setup method + configJson = Jsons.deserialize(IOs.readFile(Paths.get("secrets/config.json"))); + return configJson; + } + + @Override + protected JsonNode getFailCheckConfig() { + // TODO return an invalid config which, when used to run the connector's check connection operation, + // should result in a failed connection check + return null; + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws IOException, SQLException { + // TODO Implement this method to retrieve records which written to the destination by the connector. + // Records returned from this method will be compared against records provided to the connector + // to verify they were written correctly + + final String tableName = namingResolver.getIdentifier(streamName); + + String query = String.format( + "SELECT * FROM %s.%s ORDER BY %s ASC;", configJson.get("database").asText(), tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + PreparedStatement stmt = conn.prepareStatement(query); + ResultSet resultSet = stmt.executeQuery(); + + List res = new ArrayList<>(); + while (resultSet.next()) { + String sss = resultSet.getString(JavaBaseConstants.COLUMN_NAME_DATA); + res.add(Jsons.deserialize(StringEscapeUtils.unescapeJava(sss))); + } + stmt.close(); + return res; + } + + @Override + protected void setup(TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) { + // TODO Implement this method to run any setup actions needed before every test case + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + // TODO Implement this method to run any cleanup actions needed after every test case + } + + public void testLineBreakCharacters() { + // overrides test with a no-op until we handle full UTF-8 in the destination + } + + public void testSecondSync() throws Exception { + // PubSub cannot overwrite messages, its always append only + } + +} diff --git a/airbyte-integrations/connectors/destination-selectdb/src/test/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationTest.java b/airbyte-integrations/connectors/destination-selectdb/src/test/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationTest.java new file mode 100644 index 0000000..fdd2496 --- /dev/null +++ b/airbyte-integrations/connectors/destination-selectdb/src/test/java/io/airbyte/integrations/destination/selectdb/SelectdbDestinationTest.java @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.selectdb; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.commons.io.IOs; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.resources.MoreResources; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import io.airbyte.protocol.models.v0.ConnectorSpecification; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Instant; +import java.util.Collections; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.io.FileUtils; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class SelectdbDestinationTest { + + private static final Instant NOW = Instant.now(); + private static final Path TEST_ROOT = Path.of("/tmp/airbyte_tests"); + private static final String USERS_STREAM_NAME = "users"; + private static final String TASKS_STREAM_NAME = "tasks"; + private static final String USERS_FILE = new StandardNameTransformer().getRawTableName(USERS_STREAM_NAME) + ".csv"; + private static final String TASKS_FILE = new StandardNameTransformer().getRawTableName(TASKS_STREAM_NAME) + ".csv"; + + private static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "john").put("id", "10").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "susan").put("id", "30").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "game").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME) + .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "code").build())) + .withEmittedAt(NOW.toEpochMilli())); + private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData( + Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build()))); + + private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams( + Lists.newArrayList( + CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, null, + Field.of("name", JsonSchemaType.STRING), + Field.of("id", JsonSchemaType.STRING)), + CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, null, + Field.of("goal", JsonSchemaType.STRING)))); + + private Path destinationPath; + private JsonNode config; + + @BeforeEach + void setup() throws IOException { + destinationPath = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "test"); + config = Jsons.deserialize(IOs.readFile(Paths.get("secrets/config.json"))); + } + + private SelectdbDestination getDestination() { + final SelectdbDestination result = spy(SelectdbDestination.class); + doReturn(destinationPath).when(result).getTempPathDir(any()); + return result; + } + + @Test + void testSpec() throws Exception { + final ConnectorSpecification actual = getDestination().spec(); + final String resourceString = MoreResources.readResource("spec.json"); + final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class); + + assertEquals(expected, actual); + } + + @Test + void testCheckSuccess() { + final AirbyteConnectionStatus actual = getDestination().check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED); + assertEquals(expected, actual); + } + + @Test + void testCheckFailure() throws IOException { + final Path looksLikeADirectoryButIsAFile = destinationPath.resolve("file"); + FileUtils.touch(looksLikeADirectoryButIsAFile.toFile()); + final SelectdbDestination destination = spy(SelectdbDestination.class); + doReturn(looksLikeADirectoryButIsAFile).when(destination).getTempPathDir(any()); + + final AirbyteConnectionStatus actual = destination.check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED); + + // the message includes the random file path, so just verify it exists and then remove it when we do + // rest of the comparison. + assertNotNull(actual.getMessage()); + actual.setMessage(null); + assertEquals(expected, actual); + } + + @Test + void testCheckInvalidDestinationFolder() { + + final AirbyteConnectionStatus actual = new SelectdbDestination().check(config); + final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED); + // the message includes the random file path, so just verify it exists and then remove it when we do + // rest of the comparison. + assertNotNull(actual.getMessage()); + actual.setMessage(null); + assertEquals(expected, actual); + } + + @Test + void testWriteSuccess() throws Exception { + SelectdbDestination destination = getDestination(); + destination.check(config); + final AirbyteMessageConsumer consumer = destination.getConsumer(config, CATALOG, + Destination::defaultOutputRecordCollector); + consumer.accept(MESSAGE_USERS1); + consumer.accept(MESSAGE_TASKS1); + consumer.accept(MESSAGE_USERS2); + consumer.accept(MESSAGE_TASKS2); + consumer.accept(MESSAGE_STATE); + consumer.close(); + + } + + @SuppressWarnings("ResultOfMethodCallIgnored") + @Test + void testWriteFailure() throws Exception { + // hack to force an exception to be thrown from within the consumer. + final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1); + doThrow(new RuntimeException()).when(spiedMessage).getRecord(); + SelectdbDestination destination = getDestination(); + destination.check(config); + final AirbyteMessageConsumer consumer = spy( + destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector)); + + assertThrows(RuntimeException.class, () -> consumer.accept(spiedMessage)); + consumer.accept(MESSAGE_USERS2); + assertThrows(IOException.class, consumer::close); + + // verify tmp files are cleaned up and no files are output at all + final Set actualFilenames = Files.list(destinationPath).map(Path::getFileName).map(Path::toString) + .collect(Collectors.toSet()); + assertEquals(Collections.emptySet(), actualFilenames); + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/README.md b/airbyte-integrations/connectors/destination-tidb/README.md new file mode 100644 index 0000000..0672e49 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/README.md @@ -0,0 +1,72 @@ +# Destination TiDB + +This is the repository for the TiDB destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/tidb). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-tidb:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-tidb:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-tidb:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-tidb:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-tidb:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-tidb:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-tidb:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/tidb`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/tidbDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-tidb:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-tidb:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-tidb test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/tidb.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-tidb/build.gradle b/airbyte-integrations/connectors/destination-tidb/build.gradle new file mode 100644 index 0000000..e0845bc --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/build.gradle @@ -0,0 +1,31 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.tidb.TiDBDestination' +} + +dependencies { + + implementation 'mysql:mysql-connector-java:8.0.30' + testImplementation libs.testcontainers.tidb + + integrationTestJavaImplementation libs.testcontainers.tidb +} diff --git a/airbyte-integrations/connectors/destination-tidb/icon.svg b/airbyte-integrations/connectors/destination-tidb/icon.svg new file mode 100644 index 0000000..5aa6726 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-tidb/metadata.yaml b/airbyte-integrations/connectors/destination-tidb/metadata.yaml new file mode 100644 index 0000000..0101bc0 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/metadata.yaml @@ -0,0 +1,29 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 06ec60c7-7468-45c0-91ac-174f6e1a788b + dockerImageTag: 0.1.4 + dockerRepository: airbyte/destination-tidb + githubIssueLabel: destination-tidb + icon: tidb.svg + license: MIT + name: TiDB + normalizationConfig: + normalizationIntegrationType: tidb + normalizationRepository: airbyte/normalization-tidb + normalizationTag: 0.4.3 + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/tidb + supportsDbt: true + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBDestination.java b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBDestination.java new file mode 100644 index 0000000..a4da8be --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBDestination.java @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.base.ssh.SshWrappedDestination; +import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.commons.json.Jsons; +import io.airbyte.commons.map.MoreMaps; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus; +import java.util.Map; +import javax.sql.DataSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TiDBDestination extends AbstractJdbcDestination implements Destination { + + private static final Logger LOGGER = LoggerFactory.getLogger(TiDBDestination.class); + public static final String DRIVER_CLASS = "com.mysql.cj.jdbc.Driver"; + + static final Map DEFAULT_JDBC_PARAMETERS = ImmutableMap.of( + "allowLoadLocalInfile", "true"); + + static final Map DEFAULT_SSL_JDBC_PARAMETERS = MoreMaps.merge(ImmutableMap.of( + "useSSL", "true", + "requireSSL", "true", + "verifyServerCertificate", "false"), + DEFAULT_JDBC_PARAMETERS); + + public TiDBDestination() { + super(DRIVER_CLASS, new TiDBSQLNameTransformer(), new TiDBSqlOperations()); + } + + public static Destination sshWrappedDestination() { + return new SshWrappedDestination(new TiDBDestination(), JdbcUtils.HOST_LIST_KEY, JdbcUtils.PORT_LIST_KEY); + } + + @Override + public AirbyteConnectionStatus check(JsonNode config) { + final DataSource dataSource = getDataSource(config); + + try { + final JdbcDatabase database = getDatabase(dataSource); + final String outputSchema = getNamingResolver().getIdentifier(config.get(JdbcUtils.DATABASE_KEY).asText()); + attemptSQLCreateAndDropTableOperations(outputSchema, database, getNamingResolver(), getSqlOperations()); + return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED); + } catch (final Exception e) { + LOGGER.error("Exception while checking connection: ", e); + return new AirbyteConnectionStatus() + .withStatus(AirbyteConnectionStatus.Status.FAILED) + .withMessage("Could not connect with provided configuration. \n" + e.getMessage()); + } finally { + try { + DataSourceFactory.close(dataSource); + } catch (final Exception e) { + LOGGER.warn("Unable to close data source.", e); + } + } + } + + @Override + protected Map getDefaultConnectionProperties(JsonNode config) { + if (config.has(JdbcUtils.SSL_KEY) && config.get(JdbcUtils.SSL_KEY).asBoolean()) { + return DEFAULT_SSL_JDBC_PARAMETERS; + } else { + return DEFAULT_JDBC_PARAMETERS; + } + } + + @Override + public JsonNode toJdbcConfig(JsonNode config) { + final StringBuilder jdbcUrl = new StringBuilder(String.format("jdbc:mysql://%s:%s/%s", + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText())); + + final ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, config.get(JdbcUtils.USERNAME_KEY).asText()) + .put(JdbcUtils.JDBC_URL_KEY, jdbcUrl); + + if (config.has(JdbcUtils.PASSWORD_KEY)) { + configBuilder.put(JdbcUtils.PASSWORD_KEY, config.get(JdbcUtils.PASSWORD_KEY).asText()); + } + if (config.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) { + configBuilder.put(JdbcUtils.JDBC_URL_PARAMS_KEY, config.get(JdbcUtils.JDBC_URL_PARAMS_KEY)); + } + + return Jsons.jsonNode(configBuilder.build()); + } + + public static void main(String[] args) throws Exception { + final Destination destination = TiDBDestination.sshWrappedDestination(); + LOGGER.info("starting destination: {}", TiDBDestination.class); + new IntegrationRunner(destination).run(args); + LOGGER.info("completed destination: {}", TiDBDestination.class); + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSQLNameTransformer.java b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSQLNameTransformer.java new file mode 100644 index 0000000..5fd9320 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSQLNameTransformer.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +/** + * TiDB has some limitations on identifier length. + * https://docs.pingcap.com/tidb/stable/tidb-limitations + *

+ * Identifier type | Maximum length (number of characters allowed) Database 64 Table 64 Column 64 + * Index 64 View 64 Sequence 64 + *

+ * TiDBSQLNameTransformer is reference to + * io.airbyte.integrations.destination.mysql.MySQLNameTransformer. + */ +public class TiDBSQLNameTransformer extends StandardNameTransformer { + + @Override + public String applyDefaultCase(final String input) { + return input.toLowerCase(); + } + + // These constants must match those in destination_name_transformer.py + public static final int MAX_TIDB_NAME_LENGTH = 64; + // DBT appends a suffix to table names + public static final int TRUNCATE_DBT_RESERVED_SIZE = 12; + // 4 charachters for 1 underscore and 3 suffix (e.g. _ab1) + // 4 charachters for 1 underscore and 3 schema hash + public static final int TRUNCATE_RESERVED_SIZE = 8; + public static final int TRUNCATION_MAX_NAME_LENGTH = MAX_TIDB_NAME_LENGTH - TRUNCATE_DBT_RESERVED_SIZE - TRUNCATE_RESERVED_SIZE; + + @Override + public String getIdentifier(final String name) { + final String identifier = applyDefaultCase(super.getIdentifier(name)); + return truncateName(identifier, TRUNCATION_MAX_NAME_LENGTH); + } + + @Override + public String getTmpTableName(final String streamName) { + final String tmpTableName = applyDefaultCase(super.getTmpTableName(streamName)); + return truncateName(tmpTableName, TRUNCATION_MAX_NAME_LENGTH); + } + + @Override + public String getRawTableName(final String streamName) { + final String rawTableName = applyDefaultCase(super.getRawTableName(streamName)); + return truncateName(rawTableName, TRUNCATION_MAX_NAME_LENGTH); + } + + static String truncateName(final String name, final int maxLength) { + if (name.length() <= maxLength) { + return name; + } + + final int allowedLength = maxLength - 2; + final String prefix = name.substring(0, allowedLength / 2); + final String suffix = name.substring(name.length() - allowedLength / 2); + return prefix + "__" + suffix; + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSqlOperations.java b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSqlOperations.java new file mode 100644 index 0000000..dc10af5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/main/java/io/airbyte/integrations/destination/tidb/TiDBSqlOperations.java @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import com.fasterxml.jackson.databind.JsonNode; +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; + +@SuppressFBWarnings( + value = {"SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"}, + justification = "There is little chance of SQL injection. There is also little need for statement reuse. The basic statement is more readable than the prepared statement.") +public class TiDBSqlOperations extends JdbcSqlOperations { + + @Override + public void executeTransaction(final JdbcDatabase database, final List queries) throws Exception { + database.executeWithinTransaction(queries); + } + + @Override + public void insertRecordsInternal(final JdbcDatabase database, + final List records, + final String schemaName, + final String tmpTableName) + throws SQLException { + if (records.isEmpty()) { + return; + } + database.execute(connection -> { + File tmpFile = null; + try { + tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile(); + writeBatchToFile(tmpFile, records); + String filePath = "'" + tmpFile.getAbsolutePath() + "'"; + String query = String.format( + "LOAD DATA LOCAL INFILE %s INTO TABLE %s.%s FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '' LINES TERMINATED BY '\\r\\n'", + filePath, schemaName, tmpTableName); + + try (final Statement stmt = connection.createStatement()) { + stmt.execute(query); + } + + } catch (IOException e) { + throw new SQLException(e); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + try { + if (tmpFile != null) { + Files.delete(tmpFile.toPath()); + } + } catch (final IOException e) { + throw new RuntimeException(e); + } + } + }); + } + + @Override + public boolean isSchemaRequired() { + return false; + } + + @Override + public String createTableQuery(final JdbcDatabase database, final String schemaName, final String tableName) { + return String.format( + "CREATE TABLE IF NOT EXISTS %s.%s ( \n" + + "%s VARCHAR(256) PRIMARY KEY,\n" + + "%s JSON,\n" + + "%s TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP(6)\n" + + ");\n", + schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT); + } + + @Override + public void createSchemaIfNotExists(final JdbcDatabase database, final String schemaName) throws Exception { + // TiDB use database instead of schema. + database.execute(String.format("CREATE DATABASE IF NOT EXISTS %s;\n", schemaName)); + } + + @Override + protected JsonNode formatData(JsonNode data) { + return StandardNameTransformer.formatJsonPath(data); + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-tidb/src/main/resources/spec.json new file mode 100644 index 0000000..2de2e1b --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/main/resources/spec.json @@ -0,0 +1,65 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/tidb", + "supportsIncremental": true, + "supportsNormalization": true, + "supportsDBT": true, + "supported_destination_sync_modes": ["overwrite", "append", "append_dedup"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "TiDB Destination Spec", + "type": "object", + "required": ["host", "port", "username", "database"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 4000, + "examples": ["4000"], + "order": 1 + }, + "database": { + "title": "Database", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "User", + "description": "Username to use to access the database.", + "type": "string", + "order": 3 + }, + "password": { + "title": "Password", + "description": "Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "default": "", + "order": 4 + }, + "ssl": { + "title": "SSL Connection", + "description": "Encrypt data using SSL.", + "type": "boolean", + "default": false, + "order": 5 + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "title": "JDBC URL Params", + "type": "string", + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBDestinationAcceptanceTest.java new file mode 100644 index 0000000..59af744 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBDestinationAcceptanceTest.java @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.Database; +import io.airbyte.cdk.db.factory.DSLContextFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.JdbcDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.cdk.integrations.util.HostPortResolver; +import io.airbyte.commons.json.Jsons; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; +import java.util.stream.Collectors; +import org.jooq.DSLContext; +import org.jooq.SQLDialect; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + +public class TiDBDestinationAcceptanceTest extends JdbcDestinationAcceptanceTest { + + private final StandardNameTransformer namingResolver = new TiDBSQLNameTransformer(); + private GenericContainer container; + private final String usernameKey = "root"; + private final String passwordKey = ""; + private final String databaseKey = "test"; + private final Boolean sslKey = false; + + @Override + protected String getImageName() { + return "airbyte/destination-tidb:dev"; + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new TiDBTestDataComparator(); + } + + @Override + protected JsonNode getConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) + .put(JdbcUtils.USERNAME_KEY, usernameKey) + .put(JdbcUtils.DATABASE_KEY, databaseKey) + .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) + .put(JdbcUtils.SSL_KEY, sslKey) + .build()); + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put(JdbcUtils.HOST_KEY, HostPortResolver.resolveHost(container)) + .put(JdbcUtils.USERNAME_KEY, usernameKey) + .put(JdbcUtils.PASSWORD_KEY, "wrong password") + .put(JdbcUtils.DATABASE_KEY, databaseKey) + .put(JdbcUtils.PORT_KEY, HostPortResolver.resolvePort(container)) + .put(JdbcUtils.SSL_KEY, sslKey) + .build()); + } + + @Override + protected String getDefaultSchema(final JsonNode config) { + if (config.get(JdbcUtils.DATABASE_KEY) == null) { + return null; + } + return config.get(JdbcUtils.DATABASE_KEY).asText(); + } + + @Override + protected List retrieveRecords(TestDestinationEnv testEnv, + String streamName, + String namespace, + JsonNode streamSchema) + throws Exception { + return retrieveRecordsFromTable(namingResolver.getRawTableName(streamName), namespace) + .stream() + .map(r -> r.get(JavaBaseConstants.COLUMN_NAME_DATA)) + .collect(Collectors.toList()); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) throws SQLException { + try (final DSLContext dslContext = DSLContextFactory.create( + usernameKey, + passwordKey, + DatabaseDriver.MYSQL.getDriverClassName(), + String.format(DatabaseDriver.MYSQL.getUrlFormatString(), + container.getHost(), + container.getFirstMappedPort(), + databaseKey), + SQLDialect.MYSQL)) { + return new Database(dslContext).query( + ctx -> ctx + .fetch(String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)) + .stream() + .map(this::getJsonFromRecord) + .collect(Collectors.toList())); + } + } + + @Override + protected List retrieveNormalizedRecords(final TestDestinationEnv testEnv, final String streamName, final String namespace) + throws Exception { + final String tableName = namingResolver.getIdentifier(streamName); + final String schema = namingResolver.getIdentifier(namespace); + return retrieveRecordsFromTable(tableName, schema); + } + + @Override + protected void setup(TestDestinationEnv testEnv, HashSet TEST_SCHEMAS) { + container = new GenericContainer(DockerImageName.parse("pingcap/tidb:nightly")) + .withExposedPorts(4000); + container.start(); + } + + @Override + protected void tearDown(TestDestinationEnv testEnv) { + container.stop(); + container.close(); + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBTestDataComparator.java b/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBTestDataComparator.java new file mode 100644 index 0000000..cf65de1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/test-integration/java/io/airbyte/integrations/destination/tidb/TiDBTestDataComparator.java @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; + +public class TiDBTestDataComparator extends AdvancedTestDataComparator { + + private final StandardNameTransformer namingResolver = new TiDBSQLNameTransformer(); + private final String TIDB_DATATIME_FORMAT = "yyyy-MM-dd HH:mm:ss.S"; + + @Override + protected List resolveIdentifier(final String identifier) { + final List result = new ArrayList<>(); + final String resolved = namingResolver.getIdentifier(identifier); + result.add(identifier); + result.add(resolved); + if (!resolved.startsWith("\"")) { + result.add(resolved.toLowerCase()); + } + return result; + } + + @Override + protected boolean compareDateTimeValues(String expectedValue, String actualValue) { + if (!isDateTimeValue(actualValue)) { + actualValue = LocalDateTime.parse(actualValue, DateTimeFormatter.ofPattern(TIDB_DATATIME_FORMAT)).toString(); + } + return super.compareDateTimeValues(expectedValue, actualValue); + } + + @Override + protected boolean compareBooleanValues(String firstBooleanValue, String secondBooleanValue) { + if (secondBooleanValue.equalsIgnoreCase("true") || secondBooleanValue.equalsIgnoreCase("false")) { + return super.compareBooleanValues(firstBooleanValue, secondBooleanValue); + } else { + return super.compareBooleanValues(firstBooleanValue, String.valueOf(secondBooleanValue.equals("1"))); + } + } + +} diff --git a/airbyte-integrations/connectors/destination-tidb/src/test/java/io/airbyte/integrations/destination/tidb/TiDBDestinationTest.java b/airbyte-integrations/connectors/destination-tidb/src/test/java/io/airbyte/integrations/destination/tidb/TiDBDestinationTest.java new file mode 100644 index 0000000..c1fa5e9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-tidb/src/test/java/io/airbyte/integrations/destination/tidb/TiDBDestinationTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.tidb; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer; +import io.airbyte.cdk.integrations.base.Destination; +import io.airbyte.commons.json.Jsons; +import io.airbyte.protocol.models.Field; +import io.airbyte.protocol.models.JsonSchemaType; +import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status; +import io.airbyte.protocol.models.v0.AirbyteMessage; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import io.airbyte.protocol.models.v0.AirbyteStateMessage; +import io.airbyte.protocol.models.v0.CatalogHelpers; +import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog; +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.utility.DockerImageName; + +public class TiDBDestinationTest { + + private GenericContainer container; + final String SCHEMA_NAME = "public"; + final String STREAM_NAME = "id_and_name"; + + @BeforeEach + public void setup() { + container = new GenericContainer(DockerImageName.parse("pingcap/tidb:nightly")) + .withExposedPorts(4000); + container.start(); + } + + @AfterEach + public void tearDown() { + container.stop(); + container.close(); + } + + private JsonNode getConfig() { + return Jsons.jsonNode(ImmutableMap.of( + JdbcUtils.HOST_KEY, "127.0.0.1", + JdbcUtils.PORT_KEY, container.getFirstMappedPort(), + JdbcUtils.USERNAME_KEY, "root", + JdbcUtils.DATABASE_KEY, "test")); + } + + @Test + public void sanityTest() throws Exception { + final Destination destination = new TiDBDestination(); + final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(List.of( + CatalogHelpers.createConfiguredAirbyteStream( + STREAM_NAME, + SCHEMA_NAME, + Field.of("id", JsonSchemaType.NUMBER), + Field.of("name", JsonSchemaType.STRING)))); + JsonNode config = getConfig(); + final AirbyteMessageConsumer consumer = destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector); + final List expectedRecords = getNRecords(10); + consumer.start(); + expectedRecords.forEach(m -> { + try { + consumer.accept(m); + } catch (final Exception e) { + throw new RuntimeException(e); + } + }); + consumer.accept(new AirbyteMessage() + .withType(AirbyteMessage.Type.STATE) + .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.of(SCHEMA_NAME + "." + STREAM_NAME, 10))))); + consumer.close(); + final JdbcDatabase database = new DefaultJdbcDatabase( + DataSourceFactory.create( + config.get(JdbcUtils.USERNAME_KEY).asText(), + "", + DatabaseDriver.MYSQL.getDriverClassName(), + String.format(DatabaseDriver.MYSQL.getUrlFormatString(), + config.get(JdbcUtils.HOST_KEY).asText(), + config.get(JdbcUtils.PORT_KEY).asInt(), + config.get(JdbcUtils.DATABASE_KEY).asText()))); + final List actualRecords = database.bufferedResultSetQuery( + connection -> connection.createStatement().executeQuery("SELECT * FROM public._airbyte_raw_id_and_name;"), + JdbcUtils.getDefaultSourceOperations()::rowToJson); + final Map expectedRecordsWithId = new HashMap<>(); + expectedRecords.stream().map(AirbyteMessage::getRecord).map(AirbyteRecordMessage::getData) + .forEach(data -> expectedRecordsWithId.put(data.get("id").asInt(), data)); + actualRecords.stream().map(o -> o.get("_airbyte_data").asText()).map(Jsons::deserialize).forEach(actual -> { + assertTrue(expectedRecordsWithId.containsKey(actual.get("id").asInt())); + assertEquals(expectedRecordsWithId.get(actual.get("id").asInt()), actual); + }); + } + + private List getNRecords(final int n) { + return IntStream.range(0, n) + .boxed() + .map(i -> new AirbyteMessage() + .withType(AirbyteMessage.Type.RECORD) + .withRecord(new AirbyteRecordMessage() + .withStream(STREAM_NAME) + .withNamespace(SCHEMA_NAME) + .withEmittedAt(Instant.now().toEpochMilli()) + .withData(Jsons.jsonNode(ImmutableMap.of("id", i, "name", "human " + i))))) + .collect(Collectors.toList()); + } + + @Test + public void testCheckConnection() throws Exception { + Destination destination = new TiDBDestination(); + assertEquals(Status.SUCCEEDED, destination.check(getConfig()).getStatus()); + } + +} diff --git a/airbyte-integrations/connectors/destination-timeplus/.dockerignore b/airbyte-integrations/connectors/destination-timeplus/.dockerignore new file mode 100755 index 0000000..40dea8a --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_timeplus +!setup.py diff --git a/airbyte-integrations/connectors/destination-timeplus/Dockerfile b/airbyte-integrations/connectors/destination-timeplus/Dockerfile new file mode 100755 index 0000000..34f3c74 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_timeplus ./destination_timeplus + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/destination-timeplus diff --git a/airbyte-integrations/connectors/destination-timeplus/README.md b/airbyte-integrations/connectors/destination-timeplus/README.md new file mode 100755 index 0000000..6ba1451 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/README.md @@ -0,0 +1,108 @@ +# Timeplus Destination + +This is the repository for the Timeplus destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/timeplus). + +## Local development + +### Prerequisites + +#### Minimum Python version required `= 3.9.0` + +#### Build & Activate Virtual Environment and install dependencies + +From this connector directory, create a virtual environment: + +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: + +``` +source .venv/bin/activate +pip install -r requirements.txt +``` + +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials + +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/timeplus) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_timeplus/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination timeplus test creds` +and place them into `secrets/config.json`. + +### Locally running the connector + +``` +python main.py spec +python main.py check --config secrets/config.json +cat integration_tests/messages.jsonl | python main.py write --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-timeplus build +``` + +An image will be built with the tag `airbyte/destination-timeplus:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-timeplus:dev . +``` + +#### Run + +Then run any of the connector commands as follows: + +``` +docker run --rm airbyte/destination-timeplus:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-timeplus:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-timeplus:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-timeplus test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management + +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: + +- required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +- required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-timeplus test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/timeplus.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/__init__.py b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/__init__.py new file mode 100755 index 0000000..fa8a30e --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationTimeplus + +__all__ = ["DestinationTimeplus"] diff --git a/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/destination.py b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/destination.py new file mode 100755 index 0000000..3cf5c89 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/destination.py @@ -0,0 +1,160 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from logging import getLogger +from typing import Any, Iterable, Mapping + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import ( + AirbyteConnectionStatus, + AirbyteMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + DestinationSyncMode, + Status, + Type, +) +from timeplus import Environment, Stream + +logger = getLogger("airbyte") + + +class DestinationTimeplus(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + """ + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + endpoint = config["endpoint"] + apikey = config["apikey"] + if endpoint[-1] == "/": + endpoint = endpoint[0 : len(endpoint) - 1] + env = Environment().address(endpoint).apikey(apikey) + stream_list = Stream(env=env).list() + all_streams = {s.name for s in stream_list} + + # only support "overwrite", "append" + for configured_stream in configured_catalog.streams: + is_overwrite = configured_stream.destination_sync_mode == DestinationSyncMode.overwrite + stream_exists = configured_stream.stream.name in all_streams + logger.info(f"Stream {configured_stream.stream.name} {configured_stream.destination_sync_mode}") + need_delete_stream = False + need_create_stream = False + if is_overwrite: + if stream_exists: + # delete all data in the existing stream and recreate the stream. + need_delete_stream = True + need_create_stream = True + else: + # only need to create the stream + need_create_stream = True + else: + if stream_exists: + # for append mode, just add more data to the existing stream. No need to do anything. + pass + else: + # for append mode, create the stream and append data to it. + need_create_stream = True + + if need_delete_stream: + # delete the existing stream + Stream(env=env).name(configured_stream.stream.name).get().delete() + logger.info(f"Stream {configured_stream.stream.name} deleted successfully") + if need_create_stream: + # create a new stream + DestinationTimeplus.create_stream(env, configured_stream.stream) + logger.info(f"Stream {configured_stream.stream.name} created successfully") + + for message in input_messages: + if message.type == Type.STATE: + # Emitting a state message indicates that all records which came before it have been written to the destination. So we flush + # the queue to ensure writes happen, then output the state message to indicate it's safe to checkpoint state + yield message + elif message.type == Type.RECORD: + record = message.record + + # this code is to send data to a single-column stream + # Stream(env=env).name(record.stream).column("raw", "string").ingest(payload=record.data) + + Stream(env=env).name(record.stream).ingest(payload=record.data, format="streaming") + else: + # ignore other message types for now + continue + + @staticmethod + def create_stream(env, stream: AirbyteStream): + # singlel-column stream + # Stream(env=env).name(stream.name).column('raw','string').create() + + tp_stream = Stream(env=env).name(stream.name.strip()) + for name, v in stream.json_schema["properties"].items(): + tp_stream.column(name.strip(), DestinationTimeplus.type_mapping(v)) + tp_stream.create() + + @staticmethod + def type_mapping(v) -> str: + airbyte_type = v["type"] + if type(airbyte_type) is list: + for t in list(airbyte_type): + if t != "null": + type_def = {"type": t} + if t == "array": + type_def["items"] = v["items"] + return DestinationTimeplus.type_mapping(type_def) + if airbyte_type == "number": + return "float" + elif airbyte_type == "integer": + return "integer" + elif airbyte_type == "boolean": + return "bool" + elif airbyte_type == "object": + return "string" + elif airbyte_type == "array": + return f"array({DestinationTimeplus.type_mapping(v['items'])})" + else: + return "string" + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + endpoint = config["endpoint"] + apikey = config["apikey"] + if not endpoint.startswith("http"): + return AirbyteConnectionStatus(status=Status.FAILED, message="Endpoint must start with http or https") + if len(apikey) != 60: + return AirbyteConnectionStatus(status=Status.FAILED, message="API Key must be 60 characters") + if endpoint[-1] == "/": + endpoint = endpoint[0 : len(endpoint) - 1] + env = Environment().address(endpoint).apikey(apikey) + Stream(env=env).list() + logger.info("Successfully connected to " + endpoint) + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + return AirbyteConnectionStatus( + status=Status.FAILED, message=f"Fail to connect to Timeplus endpoint with the given API key: {repr(e)}" + ) diff --git a/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/spec.json b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/spec.json new file mode 100755 index 0000000..6a56f1b --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/destination_timeplus/spec.json @@ -0,0 +1,31 @@ +{ + "documentationUrl": "https://docs.timeplus.com", + "supported_destination_sync_modes": ["overwrite", "append"], + "supportsIncremental": true, + "supportsDBT": false, + "supportsNormalization": false, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Timeplus", + "type": "object", + "required": ["endpoint", "apikey"], + "additionalProperties": false, + "properties": { + "endpoint": { + "title": "Endpoint", + "description": "Timeplus workspace endpoint", + "type": "string", + "default": "https://us.timeplus.cloud/", + "examples": ["https://us.timeplus.cloud/workspace_id"], + "order": 0 + }, + "apikey": { + "title": "API key", + "description": "Personal API key", + "type": "string", + "airbyte_secret": true, + "order": 1 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-timeplus/icon.svg b/airbyte-integrations/connectors/destination-timeplus/icon.svg new file mode 100644 index 0000000..392443d --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/icon.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/airbyte-integrations/connectors/destination-timeplus/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/destination-timeplus/integration_tests/configured_catalog.json new file mode 100644 index 0000000..9654051 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/integration_tests/configured_catalog.json @@ -0,0 +1,263 @@ +{ + "streams": [ + { + "stream": { + "name": "airbyte_single_str_col", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "raw": { + "type": "string" + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "airbyte_acceptance_table", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "column1": { + "type": "string" + }, + "column2": { + "type": "number" + }, + "column3": { + "type": "string", + "format": "datetime", + "airbyte_type": "timestamp_without_timezone" + }, + "column4": { + "type": "number" + }, + "column5": { + "type": "array", + "items": { + "type": "integer" + } + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "airbyte_test_boolean", + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "json_schema": { + "type": "object", + "properties": { + "column1": { + "type": "boolean" + }, + "column2": { + "type": "number" + } + } + } + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "append" + }, + { + "destination_sync_mode": "overwrite", + "stream": { + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { "type": ["null", "integer"] }, + "name": { "type": ["null", "string"] }, + "base_experience": { "type": ["null", "integer"] }, + "height": { "type": ["null", "integer"] }, + "is_default": { "type": ["null", "boolean"] }, + "order": { "type": ["null", "integer"] }, + "weight": { "type": ["null", "integer"] }, + "abilities": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "is_hidden": { "type": ["null", "boolean"] }, + "slot": { "type": ["null", "integer"] }, + "ability": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + } + } + } + }, + "forms": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + } + }, + "game_indices": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "game_index": { "type": ["null", "integer"] }, + "version": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + } + } + } + }, + "held_items": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "item": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "version_details": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "version": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "rarity": { "type": ["null", "integer"] } + } + } + } + } + } + }, + "location_area_encounters": { "type": ["null", "string"] }, + "moves": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "move": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "version_group_details": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "move_learn_method": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "version_group": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "level_learned_at": { "type": ["null", "integer"] } + } + } + } + } + } + }, + "sprites": { + "type": ["null", "object"], + "properties": { + "front_default": { "type": ["null", "string"] }, + "front_shiny": { "type": ["null", "string"] }, + "front_female": { "type": ["null", "string"] }, + "front_shiny_female": { "type": ["null", "string"] }, + "back_default": { "type": ["null", "string"] }, + "back_shiny": { "type": ["null", "string"] }, + "back_female": { "type": ["null", "string"] }, + "back_shiny_female": { "type": ["null", "string"] } + } + }, + "species": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "stats": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "stat": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + }, + "effort": { "type": ["null", "integer"] }, + "base_stat": { "type": ["null", "integer"] } + } + } + }, + "types": { + "type": ["null", "array"], + "items": { + "type": ["null", "object"], + "properties": { + "slot": { "type": ["null", "integer"] }, + "type": { + "type": ["null", "object"], + "properties": { + "name": { "type": ["null", "string"] }, + "url": { "type": ["null", "string"] } + } + } + } + } + } + } + }, + "name": "pokemon", + "source_defined_cursor": false, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh" + } + ] +} diff --git a/airbyte-integrations/connectors/destination-timeplus/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-timeplus/integration_tests/integration_test.py new file mode 100755 index 0000000..e3de7da --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/integration_tests/integration_test.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import logging +from datetime import datetime +from typing import Any, Mapping + +import pytest +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_timeplus import DestinationTimeplus + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + with open("secrets/config.json", "r") as f: + return json.loads(f.read()) + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"string_col": {"type": "str"}, "int_col": {"type": "integer"}}} + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="append_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="overwrite_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + + return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream]) + + +def test_check_valid_config(config: Mapping): + outcome = DestinationTimeplus().check(logging.getLogger("airbyte"), config) + assert outcome.status == Status.SUCCEEDED + + +def test_check_invalid_config(): + outcome = DestinationTimeplus().check(logging.getLogger("airbyte"), {"secret_key": "not_a_real_secret"}) + assert outcome.status == Status.FAILED + + +def test_write(config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog): + records = [ + AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="append_stream", + data={ + "string_col": "example", + "int_col": 1, + }, + emitted_at=int(datetime.now().timestamp()) * 1000, + ), + ) + ] + dest = DestinationTimeplus() + dest.write(config, configured_catalog, records) diff --git a/airbyte-integrations/connectors/destination-timeplus/integration_tests/messages.jsonl b/airbyte-integrations/connectors/destination-timeplus/integration_tests/messages.jsonl new file mode 100644 index 0000000..6db122f --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/integration_tests/messages.jsonl @@ -0,0 +1,5 @@ +{"type": "RECORD", "record": {"stream": "airbyte_single_str_col", "data": {"raw": "my_value"}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "airbyte_acceptance_table", "data": {"column1": "my_value", "column2": 221, "column3": "2021-01-01T20:10:22", "column4": 1.214, "column5": [1,2,3]}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "airbyte_acceptance_table", "data": {"column1": "my_value2", "column2": 222, "column3": "2021-01-02T22:10:22", "column5": [1,2,null]}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "airbyte_test_boolean", "data": {"column1": true, "column2": 222}, "emitted_at": 1626172757000}} +{"type": "RECORD", "record": {"stream": "pokemon","data": { "abilities": [ { "ability": { "name": "limber", "url": "https://pokeapi.co/api/v2/ability/7/" }, "is_hidden": false, "slot": 1 }, { "ability": { "name": "imposter", "url": "https://pokeapi.co/api/v2/ability/150/" }, "is_hidden": true, "slot": 3 } ], "base_experience": 101, "forms": [ { "name": "ditto", "url": "https://pokeapi.co/api/v2/pokemon-form/132/" } ], "game_indices": [ { "game_index": 76, "version": { "name": "red", "url": "https://pokeapi.co/api/v2/version/1/" } }, { "game_index": 76, "version": { "name": "blue", "url": "https://pokeapi.co/api/v2/version/2/" } }, { "game_index": 76, "version": { "name": "yellow", "url": "https://pokeapi.co/api/v2/version/3/" } }, { "game_index": 132, "version": { "name": "gold", "url": "https://pokeapi.co/api/v2/version/4/" } }, { "game_index": 132, "version": { "name": "silver", "url": "https://pokeapi.co/api/v2/version/5/" } }, { "game_index": 132, "version": { "name": "crystal", "url": "https://pokeapi.co/api/v2/version/6/" } }, { "game_index": 132, "version": { "name": "ruby", "url": "https://pokeapi.co/api/v2/version/7/" } }, { "game_index": 132, "version": { "name": "sapphire", "url": "https://pokeapi.co/api/v2/version/8/" } }, { "game_index": 132, "version": { "name": "emerald", "url": "https://pokeapi.co/api/v2/version/9/" } }, { "game_index": 132, "version": { "name": "firered", "url": "https://pokeapi.co/api/v2/version/10/" } }, { "game_index": 132, "version": { "name": "leafgreen", "url": "https://pokeapi.co/api/v2/version/11/" } }, { "game_index": 132, "version": { "name": "diamond", "url": "https://pokeapi.co/api/v2/version/12/" } }, { "game_index": 132, "version": { "name": "pearl", "url": "https://pokeapi.co/api/v2/version/13/" } }, { "game_index": 132, "version": { "name": "platinum", "url": "https://pokeapi.co/api/v2/version/14/" } }, { "game_index": 132, "version": { "name": "heartgold", "url": "https://pokeapi.co/api/v2/version/15/" } }, { "game_index": 132, "version": { "name": "soulsilver", "url": "https://pokeapi.co/api/v2/version/16/" } }, { "game_index": 132, "version": { "name": "black", "url": "https://pokeapi.co/api/v2/version/17/" } }, { "game_index": 132, "version": { "name": "white", "url": "https://pokeapi.co/api/v2/version/18/" } }, { "game_index": 132, "version": { "name": "black-2", "url": "https://pokeapi.co/api/v2/version/21/" } }, { "game_index": 132, "version": { "name": "white-2", "url": "https://pokeapi.co/api/v2/version/22/" } } ], "height": 3, "held_items": [ { "item": { "name": "metal-powder", "url": "https://pokeapi.co/api/v2/item/234/" }, "version_details": [ { "rarity": 5, "version": { "name": "ruby", "url": "https://pokeapi.co/api/v2/version/7/" } }, { "rarity": 5, "version": { "name": "sapphire", "url": "https://pokeapi.co/api/v2/version/8/" } }, { "rarity": 5, "version": { "name": "emerald", "url": "https://pokeapi.co/api/v2/version/9/" } }, { "rarity": 5, "version": { "name": "firered", "url": "https://pokeapi.co/api/v2/version/10/" } }, { "rarity": 5, "version": { "name": "leafgreen", "url": "https://pokeapi.co/api/v2/version/11/" } }, { "rarity": 5, "version": { "name": "diamond", "url": "https://pokeapi.co/api/v2/version/12/" } }, { "rarity": 5, "version": { "name": "pearl", "url": "https://pokeapi.co/api/v2/version/13/" } }, { "rarity": 5, "version": { "name": "platinum", "url": "https://pokeapi.co/api/v2/version/14/" } }, { "rarity": 5, "version": { "name": "heartgold", "url": "https://pokeapi.co/api/v2/version/15/" } }, { "rarity": 5, "version": { "name": "soulsilver", "url": "https://pokeapi.co/api/v2/version/16/" } }, { "rarity": 5, "version": { "name": "black", "url": "https://pokeapi.co/api/v2/version/17/" } }, { "rarity": 5, "version": { "name": "white", "url": "https://pokeapi.co/api/v2/version/18/" } }, { "rarity": 5, "version": { "name": "black-2", "url": "https://pokeapi.co/api/v2/version/21/" } }, { "rarity": 5, "version": { "name": "white-2", "url": "https://pokeapi.co/api/v2/version/22/" } }, { "rarity": 5, "version": { "name": "x", "url": "https://pokeapi.co/api/v2/version/23/" } }, { "rarity": 5, "version": { "name": "y", "url": "https://pokeapi.co/api/v2/version/24/" } }, { "rarity": 5, "version": { "name": "omega-ruby", "url": "https://pokeapi.co/api/v2/version/25/" } }, { "rarity": 5, "version": { "name": "alpha-sapphire", "url": "https://pokeapi.co/api/v2/version/26/" } }, { "rarity": 5, "version": { "name": "sun", "url": "https://pokeapi.co/api/v2/version/27/" } }, { "rarity": 5, "version": { "name": "moon", "url": "https://pokeapi.co/api/v2/version/28/" } }, { "rarity": 5, "version": { "name": "ultra-sun", "url": "https://pokeapi.co/api/v2/version/29/" } }, { "rarity": 5, "version": { "name": "ultra-moon", "url": "https://pokeapi.co/api/v2/version/30/" } } ] }, { "item": { "name": "quick-powder", "url": "https://pokeapi.co/api/v2/item/251/" }, "version_details": [ { "rarity": 50, "version": { "name": "diamond", "url": "https://pokeapi.co/api/v2/version/12/" } }, { "rarity": 50, "version": { "name": "pearl", "url": "https://pokeapi.co/api/v2/version/13/" } }, { "rarity": 50, "version": { "name": "platinum", "url": "https://pokeapi.co/api/v2/version/14/" } }, { "rarity": 50, "version": { "name": "heartgold", "url": "https://pokeapi.co/api/v2/version/15/" } }, { "rarity": 50, "version": { "name": "soulsilver", "url": "https://pokeapi.co/api/v2/version/16/" } }, { "rarity": 50, "version": { "name": "black", "url": "https://pokeapi.co/api/v2/version/17/" } }, { "rarity": 50, "version": { "name": "white", "url": "https://pokeapi.co/api/v2/version/18/" } }, { "rarity": 50, "version": { "name": "black-2", "url": "https://pokeapi.co/api/v2/version/21/" } }, { "rarity": 50, "version": { "name": "white-2", "url": "https://pokeapi.co/api/v2/version/22/" } }, { "rarity": 50, "version": { "name": "x", "url": "https://pokeapi.co/api/v2/version/23/" } }, { "rarity": 50, "version": { "name": "y", "url": "https://pokeapi.co/api/v2/version/24/" } }, { "rarity": 50, "version": { "name": "omega-ruby", "url": "https://pokeapi.co/api/v2/version/25/" } }, { "rarity": 50, "version": { "name": "alpha-sapphire", "url": "https://pokeapi.co/api/v2/version/26/" } }, { "rarity": 50, "version": { "name": "sun", "url": "https://pokeapi.co/api/v2/version/27/" } }, { "rarity": 50, "version": { "name": "moon", "url": "https://pokeapi.co/api/v2/version/28/" } }, { "rarity": 50, "version": { "name": "ultra-sun", "url": "https://pokeapi.co/api/v2/version/29/" } }, { "rarity": 50, "version": { "name": "ultra-moon", "url": "https://pokeapi.co/api/v2/version/30/" } } ] } ], "id": 132, "is_default": true, "location_area_encounters": "https://pokeapi.co/api/v2/pokemon/132/encounters", "moves": [ { "move": { "name": "transform", "url": "https://pokeapi.co/api/v2/move/144/" }, "version_group_details": [ { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "red-blue", "url": "https://pokeapi.co/api/v2/version-group/1/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "yellow", "url": "https://pokeapi.co/api/v2/version-group/2/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "gold-silver", "url": "https://pokeapi.co/api/v2/version-group/3/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "crystal", "url": "https://pokeapi.co/api/v2/version-group/4/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "ruby-sapphire", "url": "https://pokeapi.co/api/v2/version-group/5/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "emerald", "url": "https://pokeapi.co/api/v2/version-group/6/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "firered-leafgreen", "url": "https://pokeapi.co/api/v2/version-group/7/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "diamond-pearl", "url": "https://pokeapi.co/api/v2/version-group/8/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "platinum", "url": "https://pokeapi.co/api/v2/version-group/9/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "heartgold-soulsilver", "url": "https://pokeapi.co/api/v2/version-group/10/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "black-white", "url": "https://pokeapi.co/api/v2/version-group/11/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "colosseum", "url": "https://pokeapi.co/api/v2/version-group/12/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "xd", "url": "https://pokeapi.co/api/v2/version-group/13/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "black-2-white-2", "url": "https://pokeapi.co/api/v2/version-group/14/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "x-y", "url": "https://pokeapi.co/api/v2/version-group/15/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "omega-ruby-alpha-sapphire", "url": "https://pokeapi.co/api/v2/version-group/16/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "sun-moon", "url": "https://pokeapi.co/api/v2/version-group/17/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "ultra-sun-ultra-moon", "url": "https://pokeapi.co/api/v2/version-group/18/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "lets-go-pikachu-lets-go-eevee", "url": "https://pokeapi.co/api/v2/version-group/19/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "sword-shield", "url": "https://pokeapi.co/api/v2/version-group/20/" } }, { "level_learned_at": 1, "move_learn_method": { "name": "level-up", "url": "https://pokeapi.co/api/v2/move-learn-method/1/" }, "version_group": { "name": "scarlet-violet", "url": "https://pokeapi.co/api/v2/version-group/25/" } } ] } ], "name": "ditto", "order": 214, "species": { "name": "ditto", "url": "https://pokeapi.co/api/v2/pokemon-species/132/" }, "sprites": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/back/132.png", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/back/shiny/132.png", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/shiny/132.png", "front_shiny_female": null, "other": { "dream_world": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/dream-world/132.svg", "front_female": null }, "home": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/home/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/home/shiny/132.png", "front_shiny_female": null }, "official-artwork": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/official-artwork/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/other/official-artwork/shiny/132.png" } }, "versions": { "generation-i": { "red-blue": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/back/132.png", "back_gray": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/back/gray/132.png", "back_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/transparent/back/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/132.png", "front_gray": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/gray/132.png", "front_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/red-blue/transparent/132.png" }, "yellow": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/back/132.png", "back_gray": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/back/gray/132.png", "back_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/transparent/back/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/132.png", "front_gray": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/gray/132.png", "front_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-i/yellow/transparent/132.png" } }, "generation-ii": { "crystal": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/back/132.png", "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/back/shiny/132.png", "back_shiny_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/transparent/back/shiny/132.png", "back_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/transparent/back/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/shiny/132.png", "front_shiny_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/transparent/shiny/132.png", "front_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/crystal/transparent/132.png" }, "gold": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/gold/back/132.png", "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/gold/back/shiny/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/gold/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/gold/shiny/132.png", "front_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/gold/transparent/132.png" }, "silver": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/silver/back/132.png", "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/silver/back/shiny/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/silver/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/silver/shiny/132.png", "front_transparent": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-ii/silver/transparent/132.png" } }, "generation-iii": { "emerald": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/emerald/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/emerald/shiny/132.png" }, "firered-leafgreen": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/firered-leafgreen/back/132.png", "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/firered-leafgreen/back/shiny/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/firered-leafgreen/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/firered-leafgreen/shiny/132.png" }, "ruby-sapphire": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/ruby-sapphire/back/132.png", "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/ruby-sapphire/back/shiny/132.png", "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/ruby-sapphire/132.png", "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iii/ruby-sapphire/shiny/132.png" } }, "generation-iv": { "diamond-pearl": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/diamond-pearl/back/132.png", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/diamond-pearl/back/shiny/132.png", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/diamond-pearl/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/diamond-pearl/shiny/132.png", "front_shiny_female": null }, "heartgold-soulsilver": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/heartgold-soulsilver/back/132.png", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/heartgold-soulsilver/back/shiny/132.png", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/heartgold-soulsilver/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/heartgold-soulsilver/shiny/132.png", "front_shiny_female": null }, "platinum": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/platinum/back/132.png", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/platinum/back/shiny/132.png", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/platinum/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-iv/platinum/shiny/132.png", "front_shiny_female": null } }, "generation-v": { "black-white": { "animated": { "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/animated/back/132.gif", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/animated/back/shiny/132.gif", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/animated/132.gif", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/animated/shiny/132.gif", "front_shiny_female": null }, "back_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/back/132.png", "back_female": null, "back_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/back/shiny/132.png", "back_shiny_female": null, "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-v/black-white/shiny/132.png", "front_shiny_female": null } }, "generation-vi": { "omegaruby-alphasapphire": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vi/omegaruby-alphasapphire/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vi/omegaruby-alphasapphire/shiny/132.png", "front_shiny_female": null }, "x-y": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vi/x-y/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vi/x-y/shiny/132.png", "front_shiny_female": null } }, "generation-vii": { "icons": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vii/icons/132.png", "front_female": null }, "ultra-sun-ultra-moon": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vii/ultra-sun-ultra-moon/132.png", "front_female": null, "front_shiny": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-vii/ultra-sun-ultra-moon/shiny/132.png", "front_shiny_female": null } }, "generation-viii": { "icons": { "front_default": "https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/versions/generation-viii/icons/132.png", "front_female": null } } } }, "stats": [ { "base_stat": 48, "effort": 1, "stat": { "name": "hp", "url": "https://pokeapi.co/api/v2/stat/1/" } }, { "base_stat": 48, "effort": 0, "stat": { "name": "attack", "url": "https://pokeapi.co/api/v2/stat/2/" } }, { "base_stat": 48, "effort": 0, "stat": { "name": "defense", "url": "https://pokeapi.co/api/v2/stat/3/" } }, { "base_stat": 48, "effort": 0, "stat": { "name": "special-attack", "url": "https://pokeapi.co/api/v2/stat/4/" } }, { "base_stat": 48, "effort": 0, "stat": { "name": "special-defense", "url": "https://pokeapi.co/api/v2/stat/5/" } }, { "base_stat": 48, "effort": 0, "stat": { "name": "speed", "url": "https://pokeapi.co/api/v2/stat/6/" } } ], "types": [ { "slot": 1, "type": { "name": "normal", "url": "https://pokeapi.co/api/v2/type/1/" } } ], "weight": 40 }, "emitted_at": 1673989852906 }} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-timeplus/main.py b/airbyte-integrations/connectors/destination-timeplus/main.py new file mode 100755 index 0000000..a6f1b6b --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_timeplus import DestinationTimeplus + +if __name__ == "__main__": + DestinationTimeplus().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-timeplus/metadata.yaml b/airbyte-integrations/connectors/destination-timeplus/metadata.yaml new file mode 100644 index 0000000..9cb94f5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: f70a8ece-351e-4790-b37b-cb790bcd6d54 + dockerImageTag: 0.1.0 + dockerRepository: airbyte/destination-timeplus + githubIssueLabel: destination-timeplus + icon: timeplus.svg + license: MIT + name: Timeplus + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/timeplus + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-timeplus/requirements.txt b/airbyte-integrations/connectors/destination-timeplus/requirements.txt new file mode 100755 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-timeplus/setup.py b/airbyte-integrations/connectors/destination-timeplus/setup.py new file mode 100755 index 0000000..c082df5 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/setup.py @@ -0,0 +1,26 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk", + "timeplus~=1.2.1", +] + +TEST_REQUIREMENTS = ["pytest~=6.2"] + +setup( + name="destination_timeplus", + description="Destination implementation for Timeplus.", + author="Airbyte", + author_email="jove@timeplus.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-timeplus/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-timeplus/unit_tests/unit_test.py new file mode 100755 index 0000000..0b63590 --- /dev/null +++ b/airbyte-integrations/connectors/destination-timeplus/unit_tests/unit_test.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from destination_timeplus import DestinationTimeplus + + +def test_type_mapping(): + expected = { + "float": {"type": "number"}, + "bool": {"type": "boolean"}, + "string": {"type": "string"}, + "integer": {"type": "integer"}, + "array(integer)": {"type": "array", "items": {"type": "integer"}}, + } + for k, v in expected.items(): + assert k == DestinationTimeplus.type_mapping(v) diff --git a/airbyte-integrations/connectors/destination-vectara/.dockerignore b/airbyte-integrations/connectors/destination-vectara/.dockerignore new file mode 100644 index 0000000..f784000 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!main.py +!destination_vectara +!setup.py diff --git a/airbyte-integrations/connectors/destination-vectara/Dockerfile b/airbyte-integrations/connectors/destination-vectara/Dockerfile new file mode 100644 index 0000000..9afa4fa --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_vectara ./destination_vectara + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.2.0 +LABEL io.airbyte.name=airbyte/destination-vectara diff --git a/airbyte-integrations/connectors/destination-vectara/README.md b/airbyte-integrations/connectors/destination-vectara/README.md new file mode 100644 index 0000000..2c68229 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/README.md @@ -0,0 +1,123 @@ +# Vectara Destination + +This is the repository for the Vectara destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/destinations/vectara). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-vectara:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/destinations/vectara) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_vectara/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination vectara test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/destination-vectara:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:destination-vectara:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-vectara:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-vectara:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-vectara:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` +## Testing + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. +First install test dependencies into your virtual environment: +``` +pip install .[tests] +``` +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Integration Tests +There are two types of integration tests: Acceptance Tests (Airbyte's test suite for all destination connectors) and custom integration tests (which are specific to this connector). +#### Custom Integration tests +Place custom tests inside `integration_tests/` folder, then, from the connector root, run +``` +python -m pytest integration_tests +``` +#### Acceptance Tests +Coming soon: + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-vectara:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-vectara:integrationTest +``` + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests. +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)). +1. Create a Pull Request. +1. Pat yourself on the back for being an awesome contributor. +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. diff --git a/airbyte-integrations/connectors/destination-vectara/destination_vectara/__init__.py b/airbyte-integrations/connectors/destination-vectara/destination_vectara/__init__.py new file mode 100644 index 0000000..1bc5391 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/destination_vectara/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationVectara + +__all__ = ["DestinationVectara"] diff --git a/airbyte-integrations/connectors/destination-vectara/destination_vectara/client.py b/airbyte-integrations/connectors/destination-vectara/destination_vectara/client.py new file mode 100644 index 0000000..755d300 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/destination_vectara/client.py @@ -0,0 +1,199 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import datetime +import json +import traceback +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Mapping + +import backoff +import requests +from destination_vectara.config import VectaraConfig + +METADATA_STREAM_FIELD = "_ab_stream" + + +def user_error(e: Exception) -> bool: + """ + Return True if this exception is caused by user error, False otherwise. + """ + if not isinstance(e, requests.exceptions.RequestException): + return False + return bool(e.response and 400 <= e.response.status_code < 500) + + +class VectaraClient: + + BASE_URL = "https://api.vectara.io/v1" + + def __init__(self, config: VectaraConfig): + if isinstance(config, dict): + config = VectaraConfig.parse_obj(config) + self.customer_id = config.customer_id + self.corpus_name = config.corpus_name + self.client_id = config.oauth2.client_id + self.client_secret = config.oauth2.client_secret + self.parallelize = config.parallelize + self.check() + + def check(self): + """ + Check for an existing corpus in Vectara. + If more than one exists - then return a message + If exactly one exists with this name - ensure that the corpus has the correct metadata fields, and use it. + If not, create it. + """ + try: + jwt_token = self._get_jwt_token() + if not jwt_token: + return "Unable to get JWT Token. Confirm your Client ID and Client Secret." + + list_corpora_response = self._request(endpoint="list-corpora", data={"numResults": 100, "filter": self.corpus_name}) + possible_corpora_ids_names_map = { + corpus.get("id"): corpus.get("name") + for corpus in list_corpora_response.get("corpus") + if corpus.get("name") == self.corpus_name + } + if len(possible_corpora_ids_names_map) > 1: + return f"Multiple Corpora exist with name {self.corpus_name}" + if len(possible_corpora_ids_names_map) == 1: + self.corpus_id = list(possible_corpora_ids_names_map.keys())[0] + else: + data = { + "corpus": { + "name": self.corpus_name, + "filterAttributes": [ + { + "name": METADATA_STREAM_FIELD, + "indexed": True, + "type": "FILTER_ATTRIBUTE_TYPE__TEXT", + "level": "FILTER_ATTRIBUTE_LEVEL__DOCUMENT", + }, + ], + } + } + + create_corpus_response = self._request(endpoint="create-corpus", data=data) + self.corpus_id = create_corpus_response.get("corpusId") + + except Exception as e: + return str(e) + "\n" + "".join(traceback.TracebackException.from_exception(e).format()) + + def _get_jwt_token(self): + """Connect to the server and get a JWT token.""" + token_endpoint = f"https://vectara-prod-{self.customer_id}.auth.us-west-2.amazoncognito.com/oauth2/token" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + } + data = {"grant_type": "client_credentials", "client_id": self.client_id, "client_secret": self.client_secret} + + request_time = datetime.datetime.now().timestamp() + response = requests.request(method="POST", url=token_endpoint, headers=headers, data=data) + response_json = response.json() + + self.jwt_token = response_json.get("access_token") + self.jwt_token_expires_ts = request_time + response_json.get("expires_in") + return self.jwt_token + + @backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_tries=5, giveup=user_error) + def _request(self, endpoint: str, http_method: str = "POST", params: Mapping[str, Any] = None, data: Mapping[str, Any] = None): + + url = f"{self.BASE_URL}/{endpoint}" + + current_ts = datetime.datetime.now().timestamp() + if self.jwt_token_expires_ts - current_ts <= 60: + self._get_jwt_token() + + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + "Authorization": f"Bearer {self.jwt_token}", + "customer-id": self.customer_id, + "X-source": "airbyte", + } + + response = requests.request(method=http_method, url=url, headers=headers, params=params, data=json.dumps(data)) + response.raise_for_status() + return response.json() + + def delete_doc_by_metadata(self, metadata_field_name, metadata_field_values): + document_ids = [] + for value in metadata_field_values: + data = { + "query": [ + { + "query": "", + "numResults": 100, + "corpusKey": [ + { + "customerId": self.customer_id, + "corpusId": self.corpus_id, + "metadataFilter": f"doc.{metadata_field_name} = '{value}'", + } + ], + } + ] + } + query_documents_response = self._request(endpoint="query", data=data) + document_ids.extend([document.get("id") for document in query_documents_response.get("responseSet")[0].get("document")]) + self.delete_docs_by_id(document_ids=document_ids) + + def delete_docs_by_id(self, document_ids): + for document_id in document_ids: + self._request( + endpoint="delete-doc", data={"customerId": self.customer_id, "corpusId": self.corpus_id, "documentId": document_id} + ) + + def index_document(self, document): + document_section, document_metadata, document_title, document_id = document + if len(document_section) == 0: + return None # Document is empty, so skip it + document_metadata = self._normalize(document_metadata) + data = { + "customerId": self.customer_id, + "corpusId": self.corpus_id, + "document": { + "documentId": document_id, + "metadataJson": json.dumps(document_metadata), + "title": document_title, + "section": [ + {"text": f"{section_key}: {section_value}"} + for section_key, section_value in document_section.items() + if section_key != METADATA_STREAM_FIELD + ], + }, + } + index_document_response = self._request(endpoint="index", data=data) + return index_document_response + + def index_documents(self, documents): + if self.parallelize: + with ThreadPoolExecutor() as executor: + futures = [executor.submit(self.index_document, doc) for doc in documents] + for future in futures: + try: + response = future.result() + if response is None: + continue + assert ( + response.get("status").get("code") == "OK" + or response.get("status").get("statusDetail") == "Document should have at least one part." + ) + except AssertionError as e: + # Handle the assertion error + pass + else: + for doc in documents: + self.index_document(doc) + + def _normalize(self, metadata: dict) -> dict: + result = {} + for key, value in metadata.items(): + if isinstance(value, (str, int, float, bool)): + result[key] = value + else: + # JSON encode all other types + result[key] = json.dumps(value) + return result diff --git a/airbyte-integrations/connectors/destination-vectara/destination_vectara/config.py b/airbyte-integrations/connectors/destination-vectara/destination_vectara/config.py new file mode 100644 index 0000000..86ca2db --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/destination_vectara/config.py @@ -0,0 +1,75 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import List, Optional + +from airbyte_cdk.utils.spec_schema_transformations import resolve_refs +from pydantic import BaseModel, Field + + +class OAuth2(BaseModel): + client_id: str = Field(..., title="OAuth Client ID", description="OAuth2.0 client id", order=0) + client_secret: str = Field(..., title="OAuth Client Secret", description="OAuth2.0 client secret", airbyte_secret=True, order=1) + + class Config: + title = "OAuth2.0 Credentials" + schema_extra = { + "description": "OAuth2.0 credentials used to authenticate admin actions (creating/deleting corpora)", + "group": "auth", + } + + +class VectaraConfig(BaseModel): + oauth2: OAuth2 + customer_id: str = Field( + ..., title="Customer ID", description="Your customer id as it is in the authenticaion url", order=2, group="account" + ) + corpus_name: str = Field(..., title="Corpus Name", description="The Name of Corpus to load data into", order=3, group="account") + + parallelize: Optional[bool] = Field( + default=False, + title="Parallelize", + description="Parallelize indexing into Vectara with multiple threads", + always_show=True, + group="account", + ) + + text_fields: Optional[List[str]] = Field( + default=[], + title="Text fields to index with Vectara", + description="List of fields in the record that should be in the section of the document. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", + always_show=True, + examples=["text", "user.name", "users.*.name"], + ) + title_field: Optional[str] = Field( + default="", + title="Text field to use as document title with Vectara", + description="A field that will be used to populate the `title` of each document. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered text fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array.", + always_show=True, + examples=["document_key"], + ) + metadata_fields: Optional[List[str]] = Field( + default=[], + title="Fields to store as metadata", + description="List of fields in the record that should be stored as metadata. The field list is applied to all streams in the same way and non-existing fields are ignored. If none are defined, all fields are considered metadata fields. When specifying text fields, you can access nested fields in the record by using dot notation, e.g. `user.name` will access the `name` field in the `user` object. It's also possible to use wildcards to access all fields in an object, e.g. `users.*.name` will access all `names` fields in all entries of the `users` array. When specifying nested paths, all matching values are flattened into an array set to a field named by the path.", + always_show=True, + examples=["age", "user"], + ) + + class Config: + title = "Vectara Config" + schema_extra = { + "description": "Configuration to connect to the Vectara instance", + "groups": [ + {"id": "account", "title": "Account"}, + {"id": "auth", "title": "Authentication"}, + ], + } + + @classmethod + def schema(cls): + """we're overriding the schema classmethod to enable some post-processing""" + schema = super().schema() + schema = resolve_refs(schema) + return schema diff --git a/airbyte-integrations/connectors/destination-vectara/destination_vectara/destination.py b/airbyte-integrations/connectors/destination-vectara/destination_vectara/destination.py new file mode 100644 index 0000000..6a58065 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/destination_vectara/destination.py @@ -0,0 +1,95 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from typing import Any, Iterable, Mapping + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import ( + AirbyteConnectionStatus, + AirbyteMessage, + ConfiguredAirbyteCatalog, + ConnectorSpecification, + DestinationSyncMode, + Status, + Type, +) +from destination_vectara.client import VectaraClient +from destination_vectara.config import VectaraConfig +from destination_vectara.writer import VectaraWriter + + +class DestinationVectara(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + + """ + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + + config_model = VectaraConfig.parse_obj(config) + writer = VectaraWriter( + client=VectaraClient(config_model), + text_fields=config_model.text_fields, + title_field=config_model.title_field, + metadata_fields=config_model.metadata_fields, + catalog=configured_catalog, + ) + + writer.delete_streams_to_overwrite(catalog=configured_catalog) + + for message in input_messages: + if message.type == Type.STATE: + # Emitting a state message indicates that all records which came before it have been written to the destination. So we flush + # the queue to ensure writes happen, then output the state message to indicate it's safe to checkpoint state + writer.flush() + yield message + elif message.type == Type.RECORD: + record = message.record + writer.queue_write_operation(record) + else: + # ignore other message types for now + continue + + # Make sure to flush any records still in the queue + writer.flush() + + def check(self, logger: AirbyteLogger, config: VectaraConfig) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + client = VectaraClient(config=config) + client_error = client.check() + if client_error: + return AirbyteConnectionStatus(status=Status.FAILED, message="\n".join([client_error])) + else: + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + + def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: + return ConnectorSpecification( + documentationUrl="https://docs.airbyte.com/integrations/destinations/vectara", + supportsIncremental=True, + supported_destination_sync_modes=[DestinationSyncMode.overwrite, DestinationSyncMode.append], + connectionSpecification=VectaraConfig.schema(), + ) diff --git a/airbyte-integrations/connectors/destination-vectara/destination_vectara/writer.py b/airbyte-integrations/connectors/destination-vectara/destination_vectara/writer.py new file mode 100644 index 0000000..0794b0d --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/destination_vectara/writer.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import uuid +from typing import Any, Dict, List, Mapping, Optional + +import dpath.util +from airbyte_cdk.models import AirbyteRecordMessage, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode +from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode +from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType +from destination_vectara.client import VectaraClient + +METADATA_STREAM_FIELD = "_ab_stream" + + +class VectaraWriter: + + write_buffer: List[Mapping[str, Any]] = [] + flush_interval = 1000 + + def __init__( + self, + client: VectaraClient, + text_fields: Optional[List[str]], + title_field: Optional[str], + metadata_fields: Optional[List[str]], + catalog: ConfiguredAirbyteCatalog, + ): + self.client = client + self.text_fields = text_fields + self.title_field = title_field + self.metadata_fields = metadata_fields + self.streams = {f"{stream.stream.namespace}_{stream.stream.name}": stream for stream in catalog.streams} + self.ids_to_delete: List[str] = [] + + def delete_streams_to_overwrite(self, catalog: ConfiguredAirbyteCatalog) -> None: + streams_to_overwrite = [ + f"{stream.stream.namespace}_{stream.stream.name}" + for stream in catalog.streams + if stream.destination_sync_mode == DestinationSyncMode.overwrite + ] + if len(streams_to_overwrite): + self.client.delete_doc_by_metadata(metadata_field_name=METADATA_STREAM_FIELD, metadata_field_values=streams_to_overwrite) + + def _delete_documents_to_dedupe(self): + if len(self.ids_to_delete) > 0: + self.client.delete_docs_by_id(document_ids=self.ids_to_delete) + + def queue_write_operation(self, record: AirbyteRecordMessage) -> None: + """Adds messages to the write queue and flushes if the buffer is full""" + + stream_identifier = self._get_stream_id(record=record) + document_section = self._get_document_section(record=record) + document_metadata = self._get_document_metadata(record=record) + document_title = self._get_document_title(record=record) + primary_key = self._get_record_primary_key(record=record) + + if primary_key: + document_id = f"Stream_{stream_identifier}_Key_{primary_key}" + if self.streams[stream_identifier].destination_sync_mode == DestinationSyncMode.append_dedup: + self.ids_to_delete.append(document_id) + else: + document_id = str(uuid.uuid4().int) + + self.write_buffer.append((document_section, document_metadata, document_title, document_id)) + if len(self.write_buffer) == self.flush_interval: + self.flush() + + def flush(self) -> None: + """Flush all documents in Queue to Vectara""" + self._delete_documents_to_dedupe() + self.client.index_documents(self.write_buffer) + self.write_buffer.clear() + self.ids_to_delete.clear() + + def _get_document_section(self, record: AirbyteRecordMessage): + relevant_fields = self._extract_relevant_fields(record, self.text_fields) + if len(relevant_fields) == 0: + text_fields = ", ".join(self.text_fields) if self.text_fields else "all fields" + raise AirbyteTracedException( + internal_message="No text fields found in record", + message=f"Record {str(record.data)[:250]}... does not contain any of the configured text fields: {text_fields}. Please check your processing configuration, there has to be at least one text field set in each record.", + failure_type=FailureType.config_error, + ) + document_section = relevant_fields + return document_section + + def _extract_relevant_fields(self, record: AirbyteRecordMessage, fields: Optional[List[str]]) -> Dict[str, Any]: + relevant_fields = {} + if fields and len(fields) > 0: + for field in fields: + values = dpath.util.values(record.data, field, separator=".") + if values and len(values) > 0: + relevant_fields[field] = values if len(values) > 1 else values[0] + else: + relevant_fields = record.data + return relevant_fields + + def _get_document_metadata(self, record: AirbyteRecordMessage) -> Dict[str, Any]: + document_metadata = self._extract_relevant_fields(record, self.metadata_fields) + document_metadata[METADATA_STREAM_FIELD] = self._get_stream_id(record) + return document_metadata + + def _get_document_title(self, record: AirbyteRecordMessage) -> str: + title = "Untitled" + if self.title_field: + title = dpath.util.get(record.data, self.title_field) + return title + + def _get_stream_id(self, record: AirbyteRecordMessage) -> str: + return f"{record.namespace}_{record.stream}" + + def _get_record_primary_key(self, record: AirbyteRecordMessage) -> Optional[str]: + stream_identifier = self._get_stream_id(record) + current_stream: ConfiguredAirbyteStream = self.streams[stream_identifier] + + if not current_stream.primary_key: + return None + + primary_key = [] + for key in current_stream.primary_key: + try: + primary_key.append(str(dpath.util.get(record.data, key))) + except KeyError: + primary_key.append("__not_found__") + stringified_primary_key = "_".join(primary_key) + return f"{stream_identifier}_{stringified_primary_key}" diff --git a/airbyte-integrations/connectors/destination-vectara/icon.svg b/airbyte-integrations/connectors/destination-vectara/icon.svg new file mode 100644 index 0000000..70798dc --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/icon.svg @@ -0,0 +1,40 @@ + + + + + + diff --git a/airbyte-integrations/connectors/destination-vectara/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-vectara/integration_tests/integration_test.py new file mode 100644 index 0000000..0520063 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/integration_tests/integration_test.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import logging +import unittest +from typing import Any, Dict + +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStateMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_vectara.client import VectaraClient +from destination_vectara.destination import DestinationVectara + + +class VectaraIntegrationTest(unittest.TestCase): + def _get_configured_catalog(self, destination_mode: DestinationSyncMode) -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"str_col": {"type": "str"}, "int_col": {"type": "integer"}}} + + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream( + name="mystream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental, SyncMode.full_refresh] + ), + primary_key=[["int_col"]], + sync_mode=SyncMode.incremental, + destination_sync_mode=destination_mode, + ) + + return ConfiguredAirbyteCatalog(streams=[overwrite_stream]) + + def _state(self, data: Dict[str, Any]) -> AirbyteMessage: + return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=data)) + + def _record(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage: + return AirbyteMessage( + type=Type.RECORD, record=AirbyteRecordMessage(stream=stream, data={"str_col": str_value, "int_col": int_value}, emitted_at=0) + ) + def _clean(self): + self._client.delete_doc_by_metadata(metadata_field_name="_ab_stream", metadata_field_values=["None_mystream"]) + + def setUp(self): + with open("secrets/config.json", "r") as f: + self.config = json.loads(f.read()) + self._client = VectaraClient(self.config) + self._clean() + + def tearDown(self): + self._clean() + + def test_check_valid_config(self): + outcome = DestinationVectara().check(logging.getLogger("airbyte"), self.config) + assert outcome.status == Status.SUCCEEDED + + def test_check_invalid_config(self): + outcome = DestinationVectara().check( + logging.getLogger("airbyte"), + { + "oauth2": {"client_id": "myclientid", "client_secret": "myclientsecret"}, + "corpus_name": "teststore", + "customer_id": "123456", + "text_fields": [], + "metadata_fields": [], + "title_field": "", + }, + ) + assert outcome.status == Status.FAILED + + def _query_index(self, query="Everything", num_results=100): + return self._client._request( + "query", + data={ + "query": [ + { + "query": query, + "numResults": num_results, + "corpusKey": [ + { + "customerId": self._client.customer_id, + "corpusId": self._client.corpus_id, + } + ], + } + ] + }, + )["responseSet"][0] + + def test_write(self): + # validate corpus starts empty + initial_result = self._query_index()["document"] + assert len(initial_result) == 0 + + catalog = self._get_configured_catalog(DestinationSyncMode.overwrite) + first_state_message = self._state({"state": "1"}) + first_record_chunk = [self._record("mystream", f"Dogs are number {i}", i) for i in range(5)] + + # initial sync + destination = DestinationVectara() + list(destination.write(self.config, catalog, [*first_record_chunk, first_state_message])) + assert len(self._query_index()["document"]) == 5 + + # incrementalally update a doc + incremental_catalog = self._get_configured_catalog(DestinationSyncMode.append_dedup) + list(destination.write(self.config, incremental_catalog, [self._record("mystream", "Cats are nice", 2), first_state_message])) + assert len(self._query_index()["document"]) == 5 + + # use semantic search + result = self._query_index("Feline animals", 1) + assert result["document"] == [ + { + "id": "Stream_None_mystream_Key_None_mystream_2", + "metadata": [ + {"name": "int_col", "value": "2"}, + {"name": "_ab_stream", "value": "None_mystream"}, + {"name": "title", "value": "Cats are nice"}, + ], + } + ] diff --git a/airbyte-integrations/connectors/destination-vectara/main.py b/airbyte-integrations/connectors/destination-vectara/main.py new file mode 100644 index 0000000..289b411 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_vectara import DestinationVectara + +if __name__ == "__main__": + DestinationVectara().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-vectara/metadata.yaml b/airbyte-integrations/connectors/destination-vectara/metadata.yaml new file mode 100644 index 0000000..eed0bec --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/metadata.yaml @@ -0,0 +1,26 @@ +data: + allowedHosts: + hosts: + - api.vectara.io + - "vectara-prod-${self.customer_id}.auth.us-west-2.amazoncognito.com" + registries: + oss: + enabled: true + cloud: + enabled: true + connectorSubtype: database + connectorType: destination + definitionId: 102900e7-a236-4c94-83e4-a4189b99adc2 + dockerImageTag: 0.2.0 + dockerRepository: airbyte/destination-vectara + githubIssueLabel: destination-vectara + icon: vectara.svg + license: MIT + name: Vectara + releaseDate: 2023-12-16 + releaseStage: alpha + supportLevel: community + documentationUrl: https://docs.airbyte.com/integrations/destinations/vectara + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-vectara/requirements.txt b/airbyte-integrations/connectors/destination-vectara/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-vectara/setup.py b/airbyte-integrations/connectors/destination-vectara/setup.py new file mode 100644 index 0000000..ab10a8c --- /dev/null +++ b/airbyte-integrations/connectors/destination-vectara/setup.py @@ -0,0 +1,25 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk==0.57.8", +] + +TEST_REQUIREMENTS = ["pytest~=6.2"] + +setup( + name="destination_vectara", + description="Destination implementation for Vectara.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-vectara/unit_tests/__init__.py b/airbyte-integrations/connectors/destination-vectara/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/airbyte-integrations/connectors/destination-kvdb/.dockerignore b/airbyte-integrations/connectors/destination-xata/.dockerignore similarity index 64% rename from airbyte-integrations/connectors/destination-kvdb/.dockerignore rename to airbyte-integrations/connectors/destination-xata/.dockerignore index 1b4b576..4037059 100644 --- a/airbyte-integrations/connectors/destination-kvdb/.dockerignore +++ b/airbyte-integrations/connectors/destination-xata/.dockerignore @@ -1,5 +1,5 @@ * !Dockerfile !main.py -!destination_kvdb +!destination_xata !setup.py diff --git a/airbyte-integrations/connectors/destination-xata/Dockerfile b/airbyte-integrations/connectors/destination-xata/Dockerfile new file mode 100644 index 0000000..a2ac681 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/Dockerfile @@ -0,0 +1,38 @@ +FROM python:3.9.11-alpine3.15 as base + +# build and load all requirements +FROM base as builder +WORKDIR /airbyte/integration_code + +# upgrade pip to the latest version +RUN apk --no-cache upgrade \ + && pip install --upgrade pip \ + && apk --no-cache add tzdata build-base + + +COPY setup.py ./ +# install necessary packages to a temporary folder +RUN pip install --prefix=/install . + +# build a clean environment +FROM base +WORKDIR /airbyte/integration_code + +# copy all loaded and built libraries to a pure basic image +COPY --from=builder /install /usr/local +# add default timezone settings +COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime +RUN echo "Etc/UTC" > /etc/timezone + +# bash is installed for more convenient debugging. +RUN apk --no-cache add bash + +# copy payload code only +COPY main.py ./ +COPY destination_xata ./destination_xata + +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] + +LABEL io.airbyte.version=0.1.1 +LABEL io.airbyte.name=airbyte/destination-xata diff --git a/airbyte-integrations/connectors/destination-xata/README.md b/airbyte-integrations/connectors/destination-xata/README.md new file mode 100644 index 0000000..e6153ac --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/README.md @@ -0,0 +1,99 @@ +# Xata Destination + +This is the repository for the Xata destination connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/destinations/xata). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Minimum Python version required `= 3.7.0` + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/destinations/xata) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_xata/spec.json` file. +Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information. +See `integration_tests/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination xata test creds` +and place them into `secrets/config.json`. + +### Locally running the connector +``` +python main.py spec +python main.py check --config secrets/config.json +python main.py discover --config secrets/config.json +python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json +``` + +### Locally running the connector docker image + + +#### Build +**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):** +```bash +airbyte-ci connectors --name=destination-xata build +``` + +An image will be built with the tag `airbyte/destination-xata:dev`. + +**Via `docker build`:** +```bash +docker build -t airbyte/destination-xata:dev . +``` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-xata:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-xata:dev check --config /secrets/config.json +# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages +cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-xata:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md): +```bash +airbyte-ci connectors --name=destination-xata test +``` + +### Customizing acceptance Tests +Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information. +If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. +We split dependencies between two groups, dependencies that are: +* required for your connector to work need to go to `MAIN_REQUIREMENTS` list. +* required for the testing need to go to `TEST_REQUIREMENTS` list + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-xata test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/xata.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-xata/bootstrap.md b/airbyte-integrations/connectors/destination-xata/bootstrap.md new file mode 100644 index 0000000..bac35e3 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/bootstrap.md @@ -0,0 +1 @@ +# Xata Destination Connector diff --git a/airbyte-integrations/connectors/destination-xata/destination_xata/__init__.py b/airbyte-integrations/connectors/destination-xata/destination_xata/__init__.py new file mode 100644 index 0000000..d030799 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/destination_xata/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .destination import DestinationXata + +__all__ = ["DestinationXata"] diff --git a/airbyte-integrations/connectors/destination-xata/destination_xata/destination.py b/airbyte-integrations/connectors/destination-xata/destination_xata/destination.py new file mode 100644 index 0000000..a9698c4 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/destination_xata/destination.py @@ -0,0 +1,79 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import logging +from typing import Any, Iterable, Mapping + +from airbyte_cdk import AirbyteLogger +from airbyte_cdk.destinations import Destination +from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status, Type +from xata.client import XataClient +from xata.helpers import BulkProcessor + +__version__ = "0.0.1" + +logger = logging.getLogger("airbyte") + + +class DestinationXata(Destination): + def write( + self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage] + ) -> Iterable[AirbyteMessage]: + """ + Reads the input stream of messages, config, and catalog to write data to the destination. + + This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received + in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been + successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing, + then the source is given the last state message output from this method as the starting point of the next sync. + + :param config: dict of JSON configuration matching the configuration declared in spec.json + :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the + destination + :param input_messages: The stream of input messages received from the source + :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs + """ + + xata = XataClient(api_key=config["api_key"], db_url=config["db_url"]) + xata.set_header("user-agent", f"airbyte/destination-xata:{__version__}") + + bp = BulkProcessor(xata) + count = 0 + for message in input_messages: + if message.type == Type.RECORD: + # Put record to processing queue + bp.put_record(message.record.stream, message.record.data) + count += 1 + if message.type == Type.STATE: + yield message + bp.flush_queue() + logger.info(bp.get_stats()) + if count != bp.get_stats()["total"] or bp.get_stats()["failed_batches"] != 0: + raise Exception( + "inconsistency found, expected %d records pushed, actual: %d with %d failures." + % (count, bp.get_stats()["total"], bp.get_stats()["failed_batches"]) + ) + + def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus: + """ + Tests if the input configuration can be used to successfully connect to the destination with the needed permissions + e.g: if a provided API token or password can be used to connect and write to the destination. + + :param logger: Logging object to display debug/info/error to the logs + (logs will not be accessible via airbyte UI if they are not passed to this logger) + :param config: Json object containing the configuration of this destination, content of this json is as specified in + the properties of the spec.json file + + :return: AirbyteConnectionStatus indicating a Success or Failure + """ + try: + xata = XataClient(api_key=config["api_key"], db_url=config["db_url"]) + xata.set_header("user-agent", f"airbyte/destination-xata:{__version__}") + + r = xata.users().getUser() + if r.status_code != 200: + raise Exception("Invalid connection parameters.") + return AirbyteConnectionStatus(status=Status.SUCCEEDED) + except Exception as e: + return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}") diff --git a/airbyte-integrations/connectors/destination-xata/destination_xata/spec.json b/airbyte-integrations/connectors/destination-xata/destination_xata/spec.json new file mode 100644 index 0000000..6e73b6c --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/destination_xata/spec.json @@ -0,0 +1,28 @@ +{ + "documentationUrl": "https://docs.airbyte.com/integrations/destinations/xata", + "supported_destination_sync_modes": ["append"], + "supportsIncremental": false, + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Destination Xata", + "type": "object", + "required": ["api_key", "db_url"], + "additionalProperties": true, + "properties": { + "api_key": { + "title": "API Key", + "description": "API Key to connect.", + "type": "string", + "order": 0, + "airbyte_secret": true + }, + "db_url": { + "title": "Database URL", + "description": "URL pointing to your workspace.", + "type": "string", + "order": 1, + "example": "https://my-workspace-abc123.us-east-1.xata.sh/db/nyc-taxi-fares:main" + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-xata/icon.svg b/airbyte-integrations/connectors/destination-xata/icon.svg new file mode 100644 index 0000000..8950b35 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-xata/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-xata/integration_tests/integration_test.py new file mode 100644 index 0000000..b98d151 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/integration_tests/integration_test.py @@ -0,0 +1,120 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from typing import Any, Mapping +from unittest.mock import Mock + +import pytest +from airbyte_cdk.models import ( + AirbyteMessage, + AirbyteRecordMessage, + AirbyteStream, + ConfiguredAirbyteCatalog, + ConfiguredAirbyteStream, + DestinationSyncMode, + Status, + SyncMode, + Type, +) +from destination_xata import DestinationXata +from xata.client import XataClient + + +@pytest.fixture(name="config") +def config_fixture() -> Mapping[str, Any]: + with open("secrets/config.json", "r") as f: + return json.loads(f.read()) + + +@pytest.fixture(name="configured_catalog") +def configured_catalog_fixture() -> ConfiguredAirbyteCatalog: + stream_schema = {"type": "object", "properties": {"string_col": {"type": "str"}, "int_col": {"type": "integer"}}} + + append_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="append_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + # TODO implement overwrite + """ + overwrite_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="overwrite_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.overwrite, + ) + """ + return ConfiguredAirbyteCatalog(streams=[append_stream]) + + +def test_check_valid_config(config: Mapping): + outcome = DestinationXata().check(logger=Mock(), config=config) + assert outcome.status == Status.SUCCEEDED + + +def test_check_invalid_config(): + f = open("integration_tests/invalid_config.json") + config = json.load(f) + outcome = DestinationXata().check(logger=Mock(), config=config) + assert outcome.status == Status.FAILED + + +def test_write(config: Mapping): + test_schema = {"type": "object", "properties": {"str_col": {"type": "str"}, "int_col": {"type": "integer"}}} + + test_stream = ConfiguredAirbyteStream( + stream=AirbyteStream(name="test_stream", json_schema=test_schema, supported_sync_modes=[SyncMode.incremental]), + sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append, + ) + + records = [ + AirbyteMessage( + type=Type.RECORD, + record=AirbyteRecordMessage( + stream="test_stream", + data={ + "str_col": "example", + "int_col": 1, + }, + emitted_at=0, + ), + ) + ] + + # setup Xata workspace + xata = XataClient(api_key=config["api_key"], db_url=config["db_url"]) + db_name = xata.get_config()["dbName"] + # database exists ? + assert xata.databases().getDatabaseMetadata(db_name).status_code == 200, f"database '{db_name}' does not exist." + assert xata.table().createTable("test_stream").status_code == 201, "could not create table, if it already exists, please delete it." + assert ( + xata.table() + .setTableSchema( + "test_stream", + { + "columns": [ + {"name": "str_col", "type": "string"}, + {"name": "int_col", "type": "int"}, + ] + }, + ) + .status_code + == 200 + ), "failed to set table schema" + + dest = DestinationXata() + list(dest.write(config=config, configured_catalog=test_stream, input_messages=records)) + + # fetch record + records = xata.data().queryTable("test_stream", {}) + assert records.status_code == 200 + assert len(records.json()["records"]) == 1 + + proof = records.json()["records"][0] + assert proof["str_col"] == "example" + assert proof["int_col"] == 1 + + # cleanup + assert xata.table().deleteTable("test_stream").status_code == 200 diff --git a/airbyte-integrations/connectors/destination-xata/integration_tests/invalid_config.json b/airbyte-integrations/connectors/destination-xata/integration_tests/invalid_config.json new file mode 100644 index 0000000..36bd35a --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/integration_tests/invalid_config.json @@ -0,0 +1,4 @@ +{ + "api_key": "husenvasen", + "database_url": "https://invalid" +} diff --git a/airbyte-integrations/connectors/destination-xata/main.py b/airbyte-integrations/connectors/destination-xata/main.py new file mode 100644 index 0000000..76e7d8f --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/main.py @@ -0,0 +1,11 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from destination_xata import DestinationXata + +if __name__ == "__main__": + DestinationXata().run(sys.argv[1:]) diff --git a/airbyte-integrations/connectors/destination-xata/metadata.yaml b/airbyte-integrations/connectors/destination-xata/metadata.yaml new file mode 100644 index 0000000..cdb6752 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/metadata.yaml @@ -0,0 +1,24 @@ +data: + registries: + cloud: + enabled: false + oss: + enabled: false + connectorSubtype: database + connectorType: destination + definitionId: 2a51c92d-0fb4-4e54-94d2-cce631f24d1f + dockerImageTag: 0.1.1 + dockerRepository: airbyte/destination-xata + githubIssueLabel: destination-xata + icon: xata.svg + license: MIT + name: Xata + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/xata + tags: + - language:python + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-xata/requirements.txt b/airbyte-integrations/connectors/destination-xata/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/connectors/destination-xata/sample_files/configured_catalog.json b/airbyte-integrations/connectors/destination-xata/sample_files/configured_catalog.json new file mode 100644 index 0000000..f526611 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/sample_files/configured_catalog.json @@ -0,0 +1,13 @@ +{ + "streams": [ + { + "stream": { + "name": "issues", + "json_schema": {}, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/connectors/destination-xata/setup.py b/airbyte-integrations/connectors/destination-xata/setup.py new file mode 100644 index 0000000..5fcb33e --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/setup.py @@ -0,0 +1,23 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = ["airbyte-cdk", "xata==0.10.1"] + +TEST_REQUIREMENTS = ["pytest~=6.2"] + +setup( + name="destination_xata", + description="Destination implementation for Xata.io", + author="Philip Krauss ", + author_email="support@xata.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, +) diff --git a/airbyte-integrations/connectors/destination-xata/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-xata/unit_tests/unit_test.py new file mode 100644 index 0000000..5172624 --- /dev/null +++ b/airbyte-integrations/connectors/destination-xata/unit_tests/unit_test.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import unittest + +from xata.client import XataClient +from xata.helpers import BulkProcessor + + +class DestinationConnectorXataTestCase(unittest.TestCase): + def test_request(self): + xata = XataClient(db_url="https://unit_tests-mock.results-store.xata.sh/db/mock-db", api_key="mock-key") + bp = BulkProcessor(xata, thread_pool_size=1, batch_size=2, flush_interval=1) + stats = bp.get_stats() + + assert "total" in stats + assert "queue" in stats + assert "failed_batches" in stats + assert "tables" in stats + + assert stats["total"] == 0 + assert stats["queue"] == 0 + assert stats["failed_batches"] == 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/airbyte-integrations/connectors/destination-yugabytedb/README.md b/airbyte-integrations/connectors/destination-yugabytedb/README.md new file mode 100644 index 0000000..cf5c9b9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/README.md @@ -0,0 +1,72 @@ +# Destination Yugabytedb + +This is the repository for the Yugabytedb destination connector in Java. +For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/yugabytedb). + +## Local development + +#### Building via Gradle +From the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:destination-yugabytedb:build +``` + +#### Create credentials +**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`. +Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information. + +**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials. + +### Locally running the connector docker image + +#### Build +Build the connector image via Gradle: + +``` +./gradlew :airbyte-integrations:connectors:destination-yugabytedb:buildConnectorImage +``` +Once built, the docker image name and tag on your host will be `airbyte/destination-yugabytedb:dev`. +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/destination-yugabytedb:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-yugabytedb:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-yugabytedb:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-yugabytedb:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json +``` + +## Testing +We use `JUnit` for Java tests. + +### Unit and Integration Tests +Place unit tests under `src/test/io/airbyte/integrations/destinations/yugabytedb`. + +#### Acceptance Tests +Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in +`src/test-integration/java/io/airbyte/integrations/destinations/yugabytedbDestinationAcceptanceTest.java`. + +### Using gradle to run tests +All commands should be run from airbyte project root. +To run unit tests: +``` +./gradlew :airbyte-integrations:connectors:destination-yugabytedb:unitTest +``` +To run acceptance and custom integration tests: +``` +./gradlew :airbyte-integrations:connectors:destination-yugabytedb:integrationTest +``` + +## Dependency Management + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-yugabytedb test` +2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors). +3. Make sure the `metadata.yaml` content is up to date. +4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/yugabytedb.md`). +5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention). +6. Pat yourself on the back for being an awesome contributor. +7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master. + diff --git a/airbyte-integrations/connectors/destination-yugabytedb/bootstrap.md b/airbyte-integrations/connectors/destination-yugabytedb/bootstrap.md new file mode 100644 index 0000000..e69de29 diff --git a/airbyte-integrations/connectors/destination-yugabytedb/build.gradle b/airbyte-integrations/connectors/destination-yugabytedb/build.gradle new file mode 100644 index 0000000..2186a1b --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/build.gradle @@ -0,0 +1,33 @@ +plugins { + id 'application' + id 'airbyte-java-connector' +} + +airbyteJavaConnector { + cdkVersionRequired = '0.2.0' + features = ['db-destinations'] + useLocalCdk = false +} + +//remove once upgrading the CDK version to 0.4.x or later +java { + compileJava { + options.compilerArgs.remove("-Werror") + } +} + +airbyteJavaConnector.addCdkDependencies() + +application { + mainClass = 'io.airbyte.integrations.destination.yugabytedb.YugabytedbDestination' +} + +dependencies { + + implementation 'com.yugabyte:jdbc-yugabytedb:42.3.5-yb-1' + + testImplementation "org.assertj:assertj-core:3.21.0" + testImplementation "org.junit.jupiter:junit-jupiter:5.8.1" + testImplementation "org.testcontainers:junit-jupiter:1.17.5" + testImplementation "org.testcontainers:jdbc:1.17.5" +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/docker-compose.yml b/airbyte-integrations/connectors/destination-yugabytedb/docker-compose.yml new file mode 100644 index 0000000..d876335 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/docker-compose.yml @@ -0,0 +1,36 @@ +version: "3" + +# Note: add mount points at /mnt/master and /mnt/tserver for persistence + +services: + yb-master: + image: yugabytedb/yugabyte:latest + container_name: yb-master-n1 + command: + [ + "/home/yugabyte/bin/yb-master", + "--fs_data_dirs=/mnt/master", + "--master_addresses=yb-master-n1:7100", + "--rpc_bind_addresses=yb-master-n1:7100", + "--replication_factor=1", + ] + ports: + - "7000:7000" + + yb-tserver: + image: yugabytedb/yugabyte:latest + container_name: yb-tserver-n1 + command: + [ + "/home/yugabyte/bin/yb-tserver", + "--fs_data_dirs=/mnt/tserver", + "--start_pgsql_proxy", + "--rpc_bind_addresses=yb-tserver-n1:9100", + "--tserver_master_addrs=yb-master-n1:7100", + ] + ports: + - "9042:9042" + - "5433:5433" + - "9000:9000" + depends_on: + - yb-master diff --git a/airbyte-integrations/connectors/destination-yugabytedb/icon.svg b/airbyte-integrations/connectors/destination-yugabytedb/icon.svg new file mode 100644 index 0000000..e02cbf7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-yugabytedb/metadata.yaml b/airbyte-integrations/connectors/destination-yugabytedb/metadata.yaml new file mode 100644 index 0000000..a6bba34 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/metadata.yaml @@ -0,0 +1,24 @@ +data: + connectorSubtype: database + connectorType: destination + definitionId: 2300fdcf-a532-419f-9f24-a014336e7966 + dockerImageTag: 0.1.1 + dockerRepository: airbyte/destination-yugabytedb + githubIssueLabel: destination-yugabytedb + icon: yugabytedb.svg + license: MIT + name: YugabyteDB + registries: + cloud: + enabled: false + oss: + enabled: false + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/destinations/yugabytedb + tags: + - language:java + ab_internal: + sl: 100 + ql: 100 + supportLevel: archived +metadataSpecVersion: "1.0" diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestination.java b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestination.java new file mode 100644 index 0000000..2ae3fc7 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestination.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import io.airbyte.cdk.db.jdbc.JdbcUtils; +import io.airbyte.cdk.integrations.base.IntegrationRunner; +import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination; +import io.airbyte.commons.json.Jsons; +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class YugabytedbDestination extends AbstractJdbcDestination { + + private static final Logger LOGGER = LoggerFactory.getLogger(YugabytedbDestination.class); + + public static final String DRIVER_CLASS = DatabaseDriver.YUGABYTEDB.getDriverClassName(); + + public YugabytedbDestination() { + super(DRIVER_CLASS, new YugabytedbNamingTransformer(), new YugabytedbSqlOperations()); + } + + public static void main(String[] args) throws Exception { + LOGGER.info("starting destination: {}", YugabytedbDestination.class); + new IntegrationRunner(new YugabytedbDestination()).run(args); + LOGGER.info("completed destination: {}", YugabytedbDestination.class); + } + + @Override + protected Map getDefaultConnectionProperties(JsonNode config) { + return Collections.emptyMap(); + } + + @Override + public JsonNode toJdbcConfig(JsonNode config) { + String schema = + Optional.ofNullable(config.get(JdbcUtils.SCHEMA_KEY)).map(JsonNode::asText).orElse("public"); + + String jdbcUrl = "jdbc:yugabytedb://" + config.get(JdbcUtils.HOST_KEY).asText() + ":" + + config.get(JdbcUtils.PORT_KEY).asText() + "/" + + config.get(JdbcUtils.DATABASE_KEY).asText(); + + ImmutableMap.Builder configBuilder = ImmutableMap.builder() + .put(JdbcUtils.USERNAME_KEY, config.get(JdbcUtils.USERNAME_KEY).asText()) + .put(JdbcUtils.JDBC_URL_KEY, jdbcUrl) + .put(JdbcUtils.SCHEMA_KEY, schema); + + if (config.has(JdbcUtils.PASSWORD_KEY)) { + configBuilder.put(JdbcUtils.PASSWORD_KEY, config.get(JdbcUtils.PASSWORD_KEY).asText()); + } + + if (config.has(JdbcUtils.JDBC_URL_PARAMS_KEY)) { + configBuilder.put(JdbcUtils.JDBC_URL_PARAMS_KEY, config.get(JdbcUtils.JDBC_URL_PARAMS_KEY).asText()); + } + + return Jsons.jsonNode(configBuilder.build()); + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformer.java b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformer.java new file mode 100644 index 0000000..2485c77 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformer.java @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; + +public class YugabytedbNamingTransformer extends StandardNameTransformer { + + @Override + public String applyDefaultCase(final String input) { + return input.toLowerCase(); + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbSqlOperations.java b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbSqlOperations.java new file mode 100644 index 0000000..bb876f8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/main/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbSqlOperations.java @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import com.yugabyte.copy.CopyManager; +import com.yugabyte.core.BaseConnection; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.destination.jdbc.JdbcSqlOperations; +import io.airbyte.protocol.models.v0.AirbyteRecordMessage; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.List; + +public class YugabytedbSqlOperations extends JdbcSqlOperations { + + @Override + protected void insertRecordsInternal(JdbcDatabase database, + List records, + String schemaName, + String tableName) + throws Exception { + + if (records.isEmpty()) { + return; + } + + File tempFile = null; + try { + tempFile = Files.createTempFile(tableName + "-", ".tmp").toFile(); + writeBatchToFile(tempFile, records); + + File finalTempFile = tempFile; + database.execute(connection -> { + + var copyManager = new CopyManager(connection.unwrap(BaseConnection.class)); + var sql = String.format("COPY %s.%s FROM STDIN DELIMITER ',' CSV", schemaName, tableName); + + try (var bufferedReader = new BufferedReader(new FileReader(finalTempFile, StandardCharsets.UTF_8))) { + copyManager.copyIn(sql, bufferedReader); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } finally { + if (tempFile != null) { + Files.delete(tempFile.toPath()); + } + } + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-yugabytedb/src/main/resources/spec.json new file mode 100644 index 0000000..d9861ba --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/main/resources/spec.json @@ -0,0 +1,65 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/destinations/yugabytedb", + "supportsIncremental": true, + "supportsNormalization": false, + "supportsDBT": false, + "supported_destination_sync_modes": ["overwrite", "append"], + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Yugabytedb destination spec", + "type": "object", + "required": ["host", "port", "username", "database", "schema"], + "additionalProperties": true, + "properties": { + "host": { + "title": "Host", + "description": "The Hostname of the database.", + "type": "string", + "order": 0 + }, + "port": { + "title": "Port", + "description": "The Port of the database.", + "type": "integer", + "minimum": 0, + "maximum": 65536, + "default": 3306, + "examples": ["3306"], + "order": 1 + }, + "database": { + "title": "Database", + "description": "Name of the database.", + "type": "string", + "order": 2 + }, + "username": { + "title": "Username", + "description": "The Username which is used to access the database.", + "type": "string", + "order": 4 + }, + "schema": { + "title": "Default Schema", + "description": "The default schema tables are written to if the source does not specify a namespace. The usual value for this field is \"public\".", + "type": "string", + "examples": ["public"], + "default": "public", + "order": 3 + }, + "password": { + "title": "Password", + "description": "The Password associated with the username.", + "type": "string", + "airbyte_secret": true, + "order": 5 + }, + "jdbc_url_params": { + "description": "Additional properties to pass to the JDBC URL string when connecting to the database formatted as 'key=value' pairs separated by the symbol '&'. (example: key1=value1&key2=value2&key3=value3).", + "title": "JDBC URL Params", + "type": "string", + "order": 6 + } + } + } +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabyteDataSource.java b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabyteDataSource.java new file mode 100644 index 0000000..f7cea14 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabyteDataSource.java @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import io.airbyte.cdk.db.factory.DataSourceFactory; +import io.airbyte.cdk.db.factory.DatabaseDriver; +import java.util.Collections; +import javax.sql.DataSource; + +public class YugabyteDataSource { + + private YugabyteDataSource() { + + } + + static DataSource getInstance(String host, int port, String database, String username, String password) { + String jdbcUrl = "jdbc:yugabytedb://" + host + ":" + port + "/" + database; + return DataSourceFactory.create( + username, + password, + DatabaseDriver.YUGABYTEDB.getDriverClassName(), + jdbcUrl, + Collections.emptyMap()); + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbContainerInitializr.java b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbContainerInitializr.java new file mode 100644 index 0000000..fe81ca9 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbContainerInitializr.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.JdbcDatabaseContainer; +import org.testcontainers.utility.DockerImageName; + +public class YugabytedbContainerInitializr { + + private static final Logger LOGGER = LoggerFactory.getLogger(YugabytedbContainerInitializr.class); + + private static YugabytedbContainer yugabytedbContainer; + + private YugabytedbContainerInitializr() { + + } + + public static YugabytedbContainer initContainer() { + if (yugabytedbContainer == null) { + yugabytedbContainer = new YugabytedbContainer(); + } + yugabytedbContainer.start(); + return yugabytedbContainer; + } + + static class YugabytedbContainer extends JdbcDatabaseContainer { + + private static final int YUGABYTE_PORT = 5433; + + public YugabytedbContainer() { + super(DockerImageName.parse("yugabytedb/yugabyte:2.15.2.0-b87")); + + this.setCommand("bin/yugabyted", "start", "--daemon=false"); + this.addExposedPort(YUGABYTE_PORT); + + } + + @Override + public String getDriverClassName() { + return "com.yugabyte.Driver"; + } + + @Override + public String getJdbcUrl() { + String params = constructUrlParameters("?", "&"); + return "jdbc:yugabytedb://" + getHost() + ":" + getMappedPort(YUGABYTE_PORT) + "/yugabyte" + params; + } + + @Override + public String getDatabaseName() { + return "yugabyte"; + } + + @Override + public String getUsername() { + return "yugabyte"; + } + + @Override + public String getPassword() { + return "yugabyte"; + } + + @Override + protected String getTestQueryString() { + return "SELECT 1"; + } + + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationAcceptanceTest.java new file mode 100644 index 0000000..ef4e3b8 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/test-integration/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationAcceptanceTest.java @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import com.fasterxml.jackson.databind.JsonNode; +import com.google.common.collect.ImmutableMap; +import io.airbyte.cdk.db.jdbc.DefaultJdbcDatabase; +import io.airbyte.cdk.db.jdbc.JdbcDatabase; +import io.airbyte.cdk.integrations.base.JavaBaseConstants; +import io.airbyte.cdk.integrations.destination.StandardNameTransformer; +import io.airbyte.cdk.integrations.standardtest.destination.JdbcDestinationAcceptanceTest; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.AdvancedTestDataComparator; +import io.airbyte.cdk.integrations.standardtest.destination.comparator.TestDataComparator; +import io.airbyte.commons.json.Jsons; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class YugabytedbDestinationAcceptanceTest extends JdbcDestinationAcceptanceTest { + + private static final Logger LOGGER = LoggerFactory.getLogger(YugabytedbDestinationAcceptanceTest.class); + + private YugabytedbContainerInitializr.YugabytedbContainer yugabytedbContainer; + + private final StandardNameTransformer namingResolver = new StandardNameTransformer(); + + private JsonNode jsonConfig; + + private JdbcDatabase database; + + private static final Set cleanupTables = new HashSet<>(); + + @BeforeAll + void initContainer() { + yugabytedbContainer = YugabytedbContainerInitializr.initContainer(); + } + + @Override + protected String getImageName() { + return "airbyte/destination-yugabytedb:dev"; + } + + @Override + protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) throws Exception { + jsonConfig = Jsons.jsonNode(ImmutableMap.builder() + .put("host", yugabytedbContainer.getHost()) + .put("port", yugabytedbContainer.getMappedPort(5433)) + .put("database", yugabytedbContainer.getDatabaseName()) + .put("username", yugabytedbContainer.getUsername()) + .put("password", yugabytedbContainer.getPassword()) + .put("schema", "public") + .build()); + + database = new DefaultJdbcDatabase(YugabyteDataSource.getInstance( + yugabytedbContainer.getHost(), + yugabytedbContainer.getMappedPort(5433), + yugabytedbContainer.getDatabaseName(), + yugabytedbContainer.getUsername(), + yugabytedbContainer.getPassword())); + + } + + @Override + protected void tearDown(final TestDestinationEnv testEnv) throws Exception { + database.execute(connection -> { + final var statement = connection.createStatement(); + cleanupTables.forEach(tb -> { + try { + statement.execute("DROP TABLE " + tb + ";"); + } catch (final SQLException e) { + throw new RuntimeException(e); + } + }); + }); + cleanupTables.clear(); + } + + @Override + protected JsonNode getConfig() { + return jsonConfig; + } + + @Override + protected JsonNode getFailCheckConfig() { + return Jsons.jsonNode(ImmutableMap.builder() + .put("host", yugabytedbContainer.getHost()) + .put("port", yugabytedbContainer.getMappedPort(5433)) + .put("database", yugabytedbContainer.getDatabaseName()) + .put("username", "usr") + .put("password", "pw") + .put("schema", "public") + .build()); + } + + @Override + protected boolean implementsNamespaces() { + return true; + } + + @Override + protected TestDataComparator getTestDataComparator() { + return new AdvancedTestDataComparator(); + } + + @Override + protected boolean supportBasicDataTypeTest() { + return true; + } + + @Override + protected boolean supportArrayDataTypeTest() { + return true; + } + + @Override + protected boolean supportObjectDataTypeTest() { + return true; + } + + @Override + protected List retrieveRecords(final TestDestinationEnv testEnv, + final String streamName, + final String namespace, + final JsonNode streamSchema) + throws SQLException { + + final String tableName = namingResolver.getRawTableName(streamName); + final String schemaName = namingResolver.getNamespace(namespace); + cleanupTables.add(schemaName + "." + tableName); + return retrieveRecordsFromTable(tableName, schemaName); + } + + private List retrieveRecordsFromTable(final String tableName, final String schemaName) + throws SQLException { + + return database.bufferedResultSetQuery( + connection -> { + final var statement = connection.createStatement(); + return statement.executeQuery( + String.format("SELECT * FROM %s.%s ORDER BY %s ASC;", schemaName, tableName, + JavaBaseConstants.COLUMN_NAME_EMITTED_AT)); + }, + rs -> Jsons.deserialize(rs.getString(JavaBaseConstants.COLUMN_NAME_DATA))); + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationTest.java b/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationTest.java new file mode 100644 index 0000000..3086383 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbDestinationTest.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.google.common.collect.ImmutableMap; +import io.airbyte.commons.json.Jsons; +import java.util.Collections; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class YugabytedbDestinationTest { + + private YugabytedbDestination yugabytedbDestination; + + @BeforeEach + void setup() { + yugabytedbDestination = new YugabytedbDestination(); + } + + @Test + void testToJdbcConfig() { + + var config = Jsons.jsonNode(ImmutableMap.builder() + .put("host", "localhost") + .put("port", 5433) + .put("database", "yugabyte") + .put("username", "yugabyte") + .put("password", "yugabyte") + .put("schema", "public") + .build()); + + var jdbcConfig = yugabytedbDestination.toJdbcConfig(config); + + assertThat(jdbcConfig.get("schema").asText()).isEqualTo("public"); + assertThat(jdbcConfig.get("username").asText()).isEqualTo("yugabyte"); + assertThat(jdbcConfig.get("password").asText()).isEqualTo("yugabyte"); + assertThat(jdbcConfig.get("jdbc_url").asText()).isEqualTo("jdbc:yugabytedb://localhost:5433/yugabyte"); + + } + + @Test + void testGetDefaultConnectionProperties() { + + var map = yugabytedbDestination.getDefaultConnectionProperties(Jsons.jsonNode(Collections.emptyMap())); + + assertThat(map).isEmpty(); + + } + +} diff --git a/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformerTest.java b/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformerTest.java new file mode 100644 index 0000000..a05a8a1 --- /dev/null +++ b/airbyte-integrations/connectors/destination-yugabytedb/src/test/java/io/airbyte/integrations/destination/yugabytedb/YugabytedbNamingTransformerTest.java @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2023 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.integrations.destination.yugabytedb; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class YugabytedbNamingTransformerTest { + + private YugabytedbNamingTransformer yugabytedbNamingTransformer; + + @BeforeEach + void setup() { + yugabytedbNamingTransformer = new YugabytedbNamingTransformer(); + } + + @Test + void testApplyDefaultCase() { + + var defaultCase = yugabytedbNamingTransformer.applyDefaultCase("DEFAULT_CASE"); + + assertThat(defaultCase).isEqualTo("default_case"); + + } + +}