From d2cebef137719ae5065a3008f0a3bf5d646ccf10 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Tue, 11 Mar 2025 10:56:16 -0400 Subject: [PATCH 1/3] Up CDK to fix schema type issue --- .../connectors/source-s3/poetry.lock | 206 ++++++++++-------- .../connectors/source-s3/pyproject.toml | 2 +- .../source-s3/source_s3/v4/stream_reader.py | 16 +- 3 files changed, 126 insertions(+), 98 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/poetry.lock b/airbyte-integrations/connectors/source-s3/poetry.lock index 5f58144920acf..d19a58e3ae3e1 100644 --- a/airbyte-integrations/connectors/source-s3/poetry.lock +++ b/airbyte-integrations/connectors/source-s3/poetry.lock @@ -2,23 +2,24 @@ [[package]] name = "airbyte-cdk" -version = "6.33.4" +version = "6.38.5.dev0" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<3.13,>=3.10" groups = ["main"] markers = "python_version <= \"3.11\"" files = [ - {file = "airbyte_cdk-6.33.4-py3-none-any.whl", hash = "sha256:1df057563078744220832922e7ae0b465b85f1de43aa955caa45377c66ffe072"}, - {file = "airbyte_cdk-6.33.4.tar.gz", hash = "sha256:8e93400b72e7492c45ad1962533d5f65df66ed0d89f627e1cb0bad86ee354562"}, + {file = "airbyte_cdk-6.38.5.dev0-py3-none-any.whl", hash = "sha256:23abf29250fcc9848ffd925afefb2fc1d070a685177485d5bf5e2fc1dd5486a8"}, + {file = "airbyte_cdk-6.38.5.dev0.tar.gz", hash = "sha256:9ec0b0e841c667d23190f76146d0a9f8571367d5b1e308d8d0e0d64ac15c9f46"}, ] [package.dependencies] airbyte-protocol-models-dataclasses = ">=0.14,<0.15" +anyascii = ">=0.3.2,<0.4.0" avro = {version = ">=1.11.2,<1.13.0", optional = true, markers = "extra == \"file-based\""} backoff = "*" cachetools = "*" -cryptography = ">=42.0.5,<44.0.0" +cryptography = ">=44.0.0,<45.0.0" dpath = ">=2.1.6,<3.0.0" dunamai = ">=1.22.0,<2.0.0" fastavro = {version = ">=1.8.0,<1.9.0", optional = true, markers = "extra == \"file-based\""} @@ -36,7 +37,7 @@ pandas = "2.2.2" pdf2image = {version = "1.16.3", optional = true, markers = "extra == \"file-based\""} "pdfminer.six" = {version = "20221105", optional = true, markers = "extra == \"file-based\""} psutil = "6.1.0" -pyarrow = {version = ">=15.0.0,<15.1.0", optional = true, markers = "extra == \"file-based\""} +pyarrow = {version = ">=19.0.0,<20.0.0", optional = true, markers = "extra == \"file-based\""} pydantic = ">=2.7,<3.0" pyjwt = ">=2.8.0,<3.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" @@ -51,7 +52,6 @@ rapidfuzz = ">=3.10.1,<4.0.0" requests = "*" requests_cache = "*" serpyco-rs = ">=1.10.2,<2.0.0" -Unidecode = ">=1.3,<2.0" unstructured = {version = "0.10.27", extras = ["docx", "pptx"], optional = true, markers = "extra == \"file-based\""} "unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"file-based\""} wcmatch = "10.0" @@ -59,7 +59,7 @@ whenever = ">=0.6.16,<0.7.0" xmltodict = ">=0.13,<0.15" [package.extras] -file-based = ["avro (>=1.11.2,<1.13.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "python-calamine (==0.2.3)", "python-snappy (==0.7.3)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +file-based = ["avro (>=1.11.2,<1.13.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=19.0.0,<20.0.0)", "pytesseract (==0.3.10)", "python-calamine (==0.2.3)", "python-snappy (==0.7.3)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sql = ["sqlalchemy (>=2.0,!=2.0.36,<3.0)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.1.16)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.8.0)"] @@ -89,6 +89,19 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] +[[package]] +name = "anyascii" +version = "0.3.2" +description = "Unicode to ASCII transliteration" +optional = false +python-versions = ">=3.3" +groups = ["main"] +markers = "python_version <= \"3.11\"" +files = [ + {file = "anyascii-0.3.2-py3-none-any.whl", hash = "sha256:3b3beef6fc43d9036d3b0529050b0c48bfad8bc960e9e562d7223cfb94fe45d4"}, + {file = "anyascii-0.3.2.tar.gz", hash = "sha256:9d5d32ef844fe225b8bc7cba7f950534fae4da27a9bf3a6bea2cb0ea46ce4730"}, +] + [[package]] name = "anyio" version = "4.8.0" @@ -641,53 +654,61 @@ dev = ["black (==22.3.0)", "hypothesis", "numpy", "pytest (>=5.30)", "pytest-ben [[package]] name = "cryptography" -version = "43.0.3" +version = "44.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false -python-versions = ">=3.7" +python-versions = "!=3.9.0,!=3.9.1,>=3.7" groups = ["main", "dev"] markers = "python_version <= \"3.11\"" files = [ - {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73"}, - {file = "cryptography-43.0.3-cp37-abi3-win32.whl", hash = "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2"}, - {file = "cryptography-43.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd"}, - {file = "cryptography-43.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73"}, - {file = "cryptography-43.0.3-cp39-abi3-win32.whl", hash = "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995"}, - {file = "cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff"}, - {file = "cryptography-43.0.3.tar.gz", hash = "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805"}, + {file = "cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7"}, + {file = "cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79"}, + {file = "cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa"}, + {file = "cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4"}, + {file = "cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5"}, + {file = "cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390"}, + {file = "cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0"}, ] [package.dependencies] cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] -nox = ["nox"] -pep8test = ["check-sdist", "click", "mypy", "ruff"] -sdist = ["build"] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] +docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] +pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] [[package]] @@ -2129,53 +2150,59 @@ test = ["pytest", "pytest-xdist", "setuptools"] [[package]] name = "pyarrow" -version = "15.0.2" +version = "19.0.1" description = "Python library for Apache Arrow" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" groups = ["main"] markers = "python_version <= \"3.11\"" files = [ - {file = "pyarrow-15.0.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:88b340f0a1d05b5ccc3d2d986279045655b1fe8e41aba6ca44ea28da0d1455d8"}, - {file = "pyarrow-15.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eaa8f96cecf32da508e6c7f69bb8401f03745c050c1dd42ec2596f2e98deecac"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23c6753ed4f6adb8461e7c383e418391b8d8453c5d67e17f416c3a5d5709afbd"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f639c059035011db8c0497e541a8a45d98a58dbe34dc8fadd0ef128f2cee46e5"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:290e36a59a0993e9a5224ed2fb3e53375770f07379a0ea03ee2fce2e6d30b423"}, - {file = "pyarrow-15.0.2-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06c2bb2a98bc792f040bef31ad3e9be6a63d0cb39189227c08a7d955db96816e"}, - {file = "pyarrow-15.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:f7a197f3670606a960ddc12adbe8075cea5f707ad7bf0dffa09637fdbb89f76c"}, - {file = "pyarrow-15.0.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5f8bc839ea36b1f99984c78e06e7a06054693dc2af8920f6fb416b5bca9944e4"}, - {file = "pyarrow-15.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f5e81dfb4e519baa6b4c80410421528c214427e77ca0ea9461eb4097c328fa33"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4f240852b302a7af4646c8bfe9950c4691a419847001178662a98915fd7ee7"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e7d9cfb5a1e648e172428c7a42b744610956f3b70f524aa3a6c02a448ba853e"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2d4f905209de70c0eb5b2de6763104d5a9a37430f137678edfb9a675bac9cd98"}, - {file = "pyarrow-15.0.2-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90adb99e8ce5f36fbecbbc422e7dcbcbed07d985eed6062e459e23f9e71fd197"}, - {file = "pyarrow-15.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:b116e7fd7889294cbd24eb90cd9bdd3850be3738d61297855a71ac3b8124ee38"}, - {file = "pyarrow-15.0.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:25335e6f1f07fdaa026a61c758ee7d19ce824a866b27bba744348fa73bb5a440"}, - {file = "pyarrow-15.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:90f19e976d9c3d8e73c80be84ddbe2f830b6304e4c576349d9360e335cd627fc"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a22366249bf5fd40ddacc4f03cd3160f2d7c247692945afb1899bab8a140ddfb"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2a335198f886b07e4b5ea16d08ee06557e07db54a8400cc0d03c7f6a22f785f"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e6d459c0c22f0b9c810a3917a1de3ee704b021a5fb8b3bacf968eece6df098f"}, - {file = "pyarrow-15.0.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:033b7cad32198754d93465dcfb71d0ba7cb7cd5c9afd7052cab7214676eec38b"}, - {file = "pyarrow-15.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:29850d050379d6e8b5a693098f4de7fd6a2bea4365bfd073d7c57c57b95041ee"}, - {file = "pyarrow-15.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:7167107d7fb6dcadb375b4b691b7e316f4368f39f6f45405a05535d7ad5e5058"}, - {file = "pyarrow-15.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e85241b44cc3d365ef950432a1b3bd44ac54626f37b2e3a0cc89c20e45dfd8bf"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:248723e4ed3255fcd73edcecc209744d58a9ca852e4cf3d2577811b6d4b59818"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ff3bdfe6f1b81ca5b73b70a8d482d37a766433823e0c21e22d1d7dde76ca33f"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:f3d77463dee7e9f284ef42d341689b459a63ff2e75cee2b9302058d0d98fe142"}, - {file = "pyarrow-15.0.2-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:8c1faf2482fb89766e79745670cbca04e7018497d85be9242d5350cba21357e1"}, - {file = "pyarrow-15.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:28f3016958a8e45a1069303a4a4f6a7d4910643fc08adb1e2e4a7ff056272ad3"}, - {file = "pyarrow-15.0.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:89722cb64286ab3d4daf168386f6968c126057b8c7ec3ef96302e81d8cdb8ae4"}, - {file = "pyarrow-15.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd0ba387705044b3ac77b1b317165c0498299b08261d8122c96051024f953cd5"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad2459bf1f22b6a5cdcc27ebfd99307d5526b62d217b984b9f5c974651398832"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58922e4bfece8b02abf7159f1f53a8f4d9f8e08f2d988109126c17c3bb261f22"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:adccc81d3dc0478ea0b498807b39a8d41628fa9210729b2f718b78cb997c7c91"}, - {file = "pyarrow-15.0.2-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:8bd2baa5fe531571847983f36a30ddbf65261ef23e496862ece83bdceb70420d"}, - {file = "pyarrow-15.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6669799a1d4ca9da9c7e06ef48368320f5856f36f9a4dd31a11839dda3f6cc8c"}, - {file = "pyarrow-15.0.2.tar.gz", hash = "sha256:9c9bc803cb3b7bfacc1e96ffbfd923601065d9d3f911179d81e72d99fd74a3d9"}, + {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, + {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a"}, + {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608"}, + {file = "pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866"}, + {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90"}, + {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3"}, + {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6"}, + {file = "pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466"}, + {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b"}, + {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6"}, + {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832"}, + {file = "pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960"}, + {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c"}, + {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6"}, + {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136"}, + {file = "pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef"}, + {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0"}, + {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a"}, + {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8"}, + {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b9766a47a9cb56fefe95cb27f535038b5a195707a08bf61b180e642324963b46"}, + {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6c5941c1aac89a6c2f2b16cd64fe76bcdb94b2b1e99ca6459de4e6f07638d755"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd44d66093a239358d07c42a91eebf5015aa54fccba959db899f932218ac9cc8"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:335d170e050bcc7da867a1ed8ffb8b44c57aaa6e0843b156a501298657b1e972"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:1c7556165bd38cf0cd992df2636f8bcdd2d4b26916c6b7e646101aff3c16f76f"}, + {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:699799f9c80bebcf1da0983ba86d7f289c5a2a5c04b945e2f2bcf7e874a91911"}, + {file = "pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429"}, + {file = "pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e"}, ] -[package.dependencies] -numpy = ">=1.16.6,<2" +[package.extras] +test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] [[package]] name = "pycparser" @@ -3630,19 +3657,6 @@ files = [ {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] -[[package]] -name = "unidecode" -version = "1.3.8" -description = "ASCII transliterations of Unicode text" -optional = false -python-versions = ">=3.5" -groups = ["main"] -markers = "python_version <= \"3.11\"" -files = [ - {file = "Unidecode-1.3.8-py3-none-any.whl", hash = "sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39"}, - {file = "Unidecode-1.3.8.tar.gz", hash = "sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4"}, -] - [[package]] name = "unstructured" version = "0.10.27" @@ -3925,4 +3939,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = "^3.10,<3.12" -content-hash = "8e86aa19aca2e954ebe24d450dd07b778a0c0684a366f56627fd094acdd3d095" +content-hash = "9923a26c22bc9568496dfd9f291ec1a3df6c2089e543a5daf281ff468a18cb39" diff --git a/airbyte-integrations/connectors/source-s3/pyproject.toml b/airbyte-integrations/connectors/source-s3/pyproject.toml index c2a25c89cf36d..ac4980c4f4e66 100644 --- a/airbyte-integrations/connectors/source-s3/pyproject.toml +++ b/airbyte-integrations/connectors/source-s3/pyproject.toml @@ -22,7 +22,7 @@ wcmatch = "==10.0" dill = "==0.3.4" transformers = "4.38.2" urllib3 = "<2" -airbyte-cdk = {extras = ["file-based"], version = "^6.18.2"} +airbyte-cdk = {extras = ["file-based"], version = "6.38.5dev0"} pendulum = "^3.0.0" [tool.poetry.scripts] diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 9c6051c8dd16f..a445f7d25d012 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -7,7 +7,7 @@ from datetime import datetime from io import IOBase from os import getenv, makedirs, path -from typing import Dict, Iterable, List, Optional, Set, cast +from typing import Any, Dict, Iterable, List, Optional, Set, cast import boto3.session import pendulum @@ -33,6 +33,20 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader): + def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]: + return {} + + def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]: + return [] + + @property + def file_permissions_schema(self) -> Dict[str, Any]: + return {} + + @property + def identities_schema(self) -> Dict[str, Any]: + return {} + FILE_SIZE_LIMIT = 1_500_000_000 def __init__(self): From c1772a7efd301620d8abb42db15dde3ab2029aae Mon Sep 17 00:00:00 2001 From: maxi297 Date: Wed, 12 Mar 2025 21:18:05 -0400 Subject: [PATCH 2/3] update release information --- .../connectors/source-s3/metadata.yaml | 2 +- .../connectors/source-s3/poetry.lock | 10 +- .../connectors/source-s3/pyproject.toml | 4 +- .../source-s3/source_s3/v4/stream_reader.py | 14 - docs/integrations/sources/s3.md | 273 +++++++++--------- 5 files changed, 146 insertions(+), 157 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 30a6f17867c10..7ab0aad647d59 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.12.2 + dockerImageTag: 4.12.3 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 diff --git a/airbyte-integrations/connectors/source-s3/poetry.lock b/airbyte-integrations/connectors/source-s3/poetry.lock index d19a58e3ae3e1..36e28243f5a38 100644 --- a/airbyte-integrations/connectors/source-s3/poetry.lock +++ b/airbyte-integrations/connectors/source-s3/poetry.lock @@ -2,15 +2,15 @@ [[package]] name = "airbyte-cdk" -version = "6.38.5.dev0" +version = "6.39.2" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<3.13,>=3.10" groups = ["main"] markers = "python_version <= \"3.11\"" files = [ - {file = "airbyte_cdk-6.38.5.dev0-py3-none-any.whl", hash = "sha256:23abf29250fcc9848ffd925afefb2fc1d070a685177485d5bf5e2fc1dd5486a8"}, - {file = "airbyte_cdk-6.38.5.dev0.tar.gz", hash = "sha256:9ec0b0e841c667d23190f76146d0a9f8571367d5b1e308d8d0e0d64ac15c9f46"}, + {file = "airbyte_cdk-6.39.2-py3-none-any.whl", hash = "sha256:7a6a79ed6499a680bc6cfefc1a32f9bd0a483b02ee2959a4d383d98ad7960422"}, + {file = "airbyte_cdk-6.39.2.tar.gz", hash = "sha256:733460a9e08a8d4d1efee2eb74c8502d37bcbd19f017290593c564b7a5741e3b"}, ] [package.dependencies] @@ -33,6 +33,7 @@ markdown = {version = "*", optional = true, markers = "extra == \"file-based\""} nltk = "3.9.1" numpy = "<2" orjson = ">=3.10.7,<4.0.0" +packaging = "*" pandas = "2.2.2" pdf2image = {version = "1.16.3", optional = true, markers = "extra == \"file-based\""} "pdfminer.six" = {version = "20221105", optional = true, markers = "extra == \"file-based\""} @@ -52,6 +53,7 @@ rapidfuzz = ">=3.10.1,<4.0.0" requests = "*" requests_cache = "*" serpyco-rs = ">=1.10.2,<2.0.0" +typing-extensions = "*" unstructured = {version = "0.10.27", extras = ["docx", "pptx"], optional = true, markers = "extra == \"file-based\""} "unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"file-based\""} wcmatch = "10.0" @@ -3939,4 +3941,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = "^3.10,<3.12" -content-hash = "9923a26c22bc9568496dfd9f291ec1a3df6c2089e543a5daf281ff468a18cb39" +content-hash = "50ecab2ba13c5098ea448596a04987be3f2dda4b115d08056a9ad6534d9c3d20" diff --git a/airbyte-integrations/connectors/source-s3/pyproject.toml b/airbyte-integrations/connectors/source-s3/pyproject.toml index ac4980c4f4e66..ebbb7f8279393 100644 --- a/airbyte-integrations/connectors/source-s3/pyproject.toml +++ b/airbyte-integrations/connectors/source-s3/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "4.12.2" +version = "4.12.3" name = "source-s3" description = "Source implementation for S3." authors = [ "Airbyte ",] @@ -22,7 +22,7 @@ wcmatch = "==10.0" dill = "==0.3.4" transformers = "4.38.2" urllib3 = "<2" -airbyte-cdk = {extras = ["file-based"], version = "6.38.5dev0"} +airbyte-cdk = {extras = ["file-based"], version = "^6"} pendulum = "^3.0.0" [tool.poetry.scripts] diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index a445f7d25d012..72e23e90009f2 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -33,20 +33,6 @@ class SourceS3StreamReader(AbstractFileBasedStreamReader): - def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]: - return {} - - def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]: - return [] - - @property - def file_permissions_schema(self) -> Dict[str, Any]: - return {} - - @property - def identities_schema(self) -> Dict[str, Any]: - return {} - FILE_SIZE_LIMIT = 1_500_000_000 def __init__(self): diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 193a6ea5536f9..3c955660ac731 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -353,141 +353,142 @@ This connector utilizes the open source [Unstructured](https://unstructured-io.g
Expand to review -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| -| 4.12.2 | 2025-02-14 | [53684](https://github.com/airbytehq/airbyte/pull/53684) | Added `pendulum` to the dependencies | -| 4.12.1 | 2025-01-25 | [52509](https://github.com/airbytehq/airbyte/pull/52509) | Update dependencies | -| 4.12.0 | 2025-01-20 | [52030](https://github.com/airbytehq/airbyte/pull/52030) | Promoting release candidate 4.12.0-rc.1 to a main version. | -| 4.12.0-rc.1 | 2025-01-15 | [51474](https://github.com/airbytehq/airbyte/pull/51474) | Bump cdk to have preserve subdirectories (default) in copy raw files functionality | -| 4.11.4 | 2025-01-11 | [51370](https://github.com/airbytehq/airbyte/pull/51370) | Update dependencies | -| 4.11.3 | 2025-01-04 | [50932](https://github.com/airbytehq/airbyte/pull/50932) | Update dependencies | -| 4.11.2 | 2024-12-28 | [50739](https://github.com/airbytehq/airbyte/pull/50739) | Update dependencies | -| 4.11.1 | 2024-12-21 | [49042](https://github.com/airbytehq/airbyte/pull/49042) | Update dependencies | -| 4.11.0 | 2024-12-17 | [49824](https://github.com/airbytehq/airbyte/pull/49824) | Increase file size limit to 1.5GB | -| 4.10.2 | 2024-11-25 | [48613](https://github.com/airbytehq/airbyte/pull/48613) | Starting with this version, the Docker image is now rootless. Please note that this and future versions will not be compatible with Airbyte versions earlier than 0.64 | -| 4.10.1 | 2024-11-12 | [48346](https://github.com/airbytehq/airbyte/pull/48346) | Implement file-transfer capabilities | -| 4.9.2 | 2024-11-04 | [48259](https://github.com/airbytehq/airbyte/pull/48259) | Update dependencies | -| 4.9.1 | 2024-10-29 | [47038](https://github.com/airbytehq/airbyte/pull/47038) | Update dependencies | -| 4.9.0 | 2024-10-17 | [46973](https://github.com/airbytehq/airbyte/pull/46973) | Promote releae candidate. | -| 4.9.0-rc.1 | 2024-10-14 | [46298](https://github.com/airbytehq/airbyte/pull/46298) | Migrate to CDK v5 | -| 4.8.5 | 2024-10-12 | [46511](https://github.com/airbytehq/airbyte/pull/46511) | Update dependencies | -| 4.8.4 | 2024-09-28 | [46131](https://github.com/airbytehq/airbyte/pull/46131) | Update dependencies | -| 4.8.3 | 2024-09-21 | [45757](https://github.com/airbytehq/airbyte/pull/45757) | Update dependencies | -| 4.8.2 | 2024-09-14 | [45504](https://github.com/airbytehq/airbyte/pull/45504) | Update dependencies | -| 4.8.1 | 2024-09-07 | [45257](https://github.com/airbytehq/airbyte/pull/45257) | Update dependencies | -| 4.8.0 | 2024-09-03 | [44908](https://github.com/airbytehq/airbyte/pull/44908) | Migrate to CDK v3 | -| 4.7.8 | 2024-08-31 | [45009](https://github.com/airbytehq/airbyte/pull/45009) | Update dependencies | -| 4.7.7 | 2024-08-24 | [44732](https://github.com/airbytehq/airbyte/pull/44732) | Update dependencies | -| 4.7.6 | 2024-08-19 | [44380](https://github.com/airbytehq/airbyte/pull/44380) | Update dependencies | -| 4.7.5 | 2024-08-12 | [43868](https://github.com/airbytehq/airbyte/pull/43868) | Update dependencies | -| 4.7.4 | 2024-08-10 | [43667](https://github.com/airbytehq/airbyte/pull/43667) | Update dependencies | -| 4.7.3 | 2024-08-03 | [43083](https://github.com/airbytehq/airbyte/pull/43083) | Update dependencies | -| 4.7.2 | 2024-07-27 | [42814](https://github.com/airbytehq/airbyte/pull/42814) | Update dependencies | -| 4.7.1 | 2024-07-20 | [42205](https://github.com/airbytehq/airbyte/pull/42205) | Update dependencies | -| 4.7.0 | 2024-07-16 | [41934](https://github.com/airbytehq/airbyte/pull/41934) | Update to 3.5.1 CDK | -| 4.6.3 | 2024-07-13 | [41934](https://github.com/airbytehq/airbyte/pull/41934) | Update dependencies | -| 4.6.2 | 2024-07-10 | [41503](https://github.com/airbytehq/airbyte/pull/41503) | Update dependencies | -| 4.6.1 | 2024-07-09 | [40067](https://github.com/airbytehq/airbyte/pull/40067) | Update dependencies | -| 4.6.0 | 2024-06-26 | [39573](https://github.com/airbytehq/airbyte/pull/39573) | Improve performance: update to Airbyte CDK 2.0.0 | -| 4.5.17 | 2024-06-06 | [39214](https://github.com/airbytehq/airbyte/pull/39214) | [autopull] Upgrade base image to v1.2.2 | -| 4.5.16 | 2024-05-29 | [38674](https://github.com/airbytehq/airbyte/pull/38674) | Avoid error on empty stream when running discover | -| 4.5.15 | 2024-05-20 | [38252](https://github.com/airbytehq/airbyte/pull/38252) | Replace AirbyteLogger with logging.Logger | -| 4.5.14 | 2024-05-09 | [38090](https://github.com/airbytehq/airbyte/pull/38090) | Bump python-cdk version to include CSV field length fix | -| 4.5.13 | 2024-05-03 | [37776](https://github.com/airbytehq/airbyte/pull/37776) | Update `airbyte-cdk` to fix the `discovery` command issue | -| 4.5.12 | 2024-04-11 | [37001](https://github.com/airbytehq/airbyte/pull/37001) | Update airbyte-cdk to flush print buffer for every message | -| 4.5.11 | 2024-03-14 | [36160](https://github.com/airbytehq/airbyte/pull/36160) | Bump python-cdk version to include CSV tab delimiter fix | -| 4.5.10 | 2024-03-11 | [35955](https://github.com/airbytehq/airbyte/pull/35955) | Pin `transformers` transitive dependency | -| 4.5.9 | 2024-03-06 | [35857](https://github.com/airbytehq/airbyte/pull/35857) | Bump poetry.lock to upgrade transitive dependency | -| 4.5.8 | 2024-03-04 | [35808](https://github.com/airbytehq/airbyte/pull/35808) | Use cached AWS client | -| 4.5.7 | 2024-02-23 | [34895](https://github.com/airbytehq/airbyte/pull/34895) | Run incremental syncs with concurrency | -| 4.5.6 | 2024-02-21 | [35246](https://github.com/airbytehq/airbyte/pull/35246) | Fixes bug that occurred when creating CSV streams with tab delimiter. | -| 4.5.5 | 2024-02-18 | [35392](https://github.com/airbytehq/airbyte/pull/35392) | Add support filtering by start date | -| 4.5.4 | 2024-02-15 | [35055](https://github.com/airbytehq/airbyte/pull/35055) | Temporarily revert concurrency | -| 4.5.3 | 2024-02-12 | [35164](https://github.com/airbytehq/airbyte/pull/35164) | Manage dependencies with Poetry. | -| 4.5.2 | 2024-02-06 | [34930](https://github.com/airbytehq/airbyte/pull/34930) | Bump CDK version to fix issue when SyncMode is missing from catalog | -| 4.5.1 | 2024-02-02 | [31701](https://github.com/airbytehq/airbyte/pull/31701) | Add `region` support | -| 4.5.0 | 2024-02-01 | [34591](https://github.com/airbytehq/airbyte/pull/34591) | Run full refresh syncs concurrently | -| 4.4.1 | 2024-01-30 | [34665](https://github.com/airbytehq/airbyte/pull/34665) | Pin moto & CDK version | -| 4.4.0 | 2024-01-12 | [33818](https://github.com/airbytehq/airbyte/pull/33818) | Add IAM Role Authentication | -| 4.3.1 | 2024-01-04 | [33937](https://github.com/airbytehq/airbyte/pull/33937) | Prepare for airbyte-lib | -| 4.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files | -| 4.2.4 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key | -| 4.2.3 | 2023-11-16 | [32608](https://github.com/airbytehq/airbyte/pull/32608) | Improve document file type parser | -| 4.2.2 | 2023-11-20 | [32677](https://github.com/airbytehq/airbyte/pull/32677) | Only read files with ".zip" extension as zipped files | -| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | -| 4.2.0 | 2023-11-02 | [32109](https://github.com/airbytehq/airbyte/pull/32109) | Fix docs; add HTTPS validation for S3 endpoint; fix coverage | -| 4.1.4 | 2023-10-30 | [31904](https://github.com/airbytehq/airbyte/pull/31904) | Update CDK | -| 4.1.3 | 2023-10-25 | [31654](https://github.com/airbytehq/airbyte/pull/31654) | Reduce image size | -| 4.1.2 | 2023-10-23 | [31383](https://github.com/airbytehq/airbyte/pull/31383) | Add handling NoSuchBucket error | -| 4.1.1 | 2023-10-19 | [31601](https://github.com/airbytehq/airbyte/pull/31601) | Base image migration: remove Dockerfile and use the python-connector-base image | -| 4.1.0 | 2023-10-17 | [31340](https://github.com/airbytehq/airbyte/pull/31340) | Add reading files inside zip archive | -| 4.0.5 | 2023-10-16 | [31209](https://github.com/airbytehq/airbyte/pull/31209) | Add experimental Markdown/PDF/Docx file format | -| 4.0.4 | 2023-09-18 | [30476](https://github.com/airbytehq/airbyte/pull/30476) | Remove streams.\*.file_type from source-s3 configuration | -| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors | -| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields | -| 4.0.1 | 2023-09-06 | [30217](https://github.com/airbytehq/airbyte/pull/30217) | Migrate inference error to config errors and avoir sentry alerts | -| 4.0.0 | 2023-09-05 | [29757](https://github.com/airbytehq/airbyte/pull/29757) | New version using file-based CDK | -| 3.1.11 | 2023-08-30 | [29986](https://github.com/airbytehq/airbyte/pull/29986) | Add config error for conversion error | -| 3.1.10 | 2023-08-29 | [29943](https://github.com/airbytehq/airbyte/pull/29943) | Add config error for arrow invalid error | -| 3.1.9 | 2023-08-23 | [29753](https://github.com/airbytehq/airbyte/pull/29753) | Feature parity update for V4 release | -| 3.1.8 | 2023-08-17 | [29520](https://github.com/airbytehq/airbyte/pull/29520) | Update legacy state and error handling | -| 3.1.7 | 2023-08-17 | [29505](https://github.com/airbytehq/airbyte/pull/29505) | v4 StreamReader and Cursor fixes | -| 3.1.6 | 2023-08-16 | [29480](https://github.com/airbytehq/airbyte/pull/29480) | update Pyarrow to version 12.0.1 | -| 3.1.5 | 2023-08-15 | [29418](https://github.com/airbytehq/airbyte/pull/29418) | Avoid duplicate syncs when migrating from v3 to v4 | -| 3.1.4 | 2023-08-15 | [29382](https://github.com/airbytehq/airbyte/pull/29382) | Handle legacy path prefix & path pattern | -| 3.1.3 | 2023-08-05 | [29028](https://github.com/airbytehq/airbyte/pull/29028) | Update v3 & v4 connector to handle either state message | -| 3.1.2 | 2023-07-29 | [28786](https://github.com/airbytehq/airbyte/pull/28786) | Add a codepath for using the file-based CDK | -| 3.1.1 | 2023-07-26 | [28730](https://github.com/airbytehq/airbyte/pull/28730) | Add human readable error message and improve validation for encoding field when it empty | -| 3.1.0 | 2023-06-26 | [27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 | -| 3.0.3 | 2023-06-23 | [27651](https://github.com/airbytehq/airbyte/pull/27651) | Handle Bucket Access Errors | -| 3.0.2 | 2023-06-22 | [27611](https://github.com/airbytehq/airbyte/pull/27611) | Fix start date | -| 3.0.1 | 2023-06-22 | [27604](https://github.com/airbytehq/airbyte/pull/27604) | Add logging for file reading | -| 3.0.0 | 2023-05-02 | [25127](https://github.com/airbytehq/airbyte/pull/25127) | Remove ab_additional column; Use platform-handled schema evolution | -| 2.2.0 | 2023-05-10 | [25937](https://github.com/airbytehq/airbyte/pull/25937) | Add support for Parquet Dataset | -| 2.1.4 | 2023-05-01 | [25361](https://github.com/airbytehq/airbyte/pull/25361) | Parse nested avro schemas | -| 2.1.3 | 2023-05-01 | [25706](https://github.com/airbytehq/airbyte/pull/25706) | Remove minimum block size for CSV check | -| 2.1.2 | 2023-04-18 | [25067](https://github.com/airbytehq/airbyte/pull/25067) | Handle block size related errors; fix config validator | -| 2.1.1 | 2023-04-18 | [25010](https://github.com/airbytehq/airbyte/pull/25010) | Refactor filter logic | -| 2.1.0 | 2023-04-10 | [25010](https://github.com/airbytehq/airbyte/pull/25010) | Add `start_date` field to filter files based on `LastModified` option | -| 2.0.4 | 2023-03-23 | [24429](https://github.com/airbytehq/airbyte/pull/24429) | Call `check` with a little block size to save time and memory. | -| 2.0.3 | 2023-03-17 | [24178](https://github.com/airbytehq/airbyte/pull/24178) | Support legacy datetime format for the period of migration, fix time-zone conversion. | -| 2.0.2 | 2023-03-16 | [24157](https://github.com/airbytehq/airbyte/pull/24157) | Return empty schema if `discover` finds no files; Do not infer extra data types when user defined schema is applied. | -| 2.0.1 | 2023-03-06 | [23195](https://github.com/airbytehq/airbyte/pull/23195) | Fix datetime format string | -| 2.0.0 | 2023-03-14 | [23189](https://github.com/airbytehq/airbyte/pull/23189) | Infer schema based on one file instead of all the files | -| 1.0.2 | 2023-03-02 | [23669](https://github.com/airbytehq/airbyte/pull/23669) | Made `Advanced Reader Options` and `Advanced Options` truly `optional` for `CSV` format | -| 1.0.1 | 2023-02-27 | [23502](https://github.com/airbytehq/airbyte/pull/23502) | Fix error handling | -| 1.0.0 | 2023-02-17 | [23198](https://github.com/airbytehq/airbyte/pull/23198) | Fix Avro schema discovery | -| 0.1.32 | 2023-02-07 | [22500](https://github.com/airbytehq/airbyte/pull/22500) | Speed up discovery | -| 0.1.31 | 2023-02-08 | [22550](https://github.com/airbytehq/airbyte/pull/22550) | Validate CSV read options and convert options | -| 0.1.30 | 2023-01-25 | [21587](https://github.com/airbytehq/airbyte/pull/21587) | Make sure spec works as expected in UI | -| 0.1.29 | 2023-01-19 | [21604](https://github.com/airbytehq/airbyte/pull/21604) | Handle OSError: skip unreachable keys and keep working on accessible ones. Warn a customer | -| 0.1.28 | 2023-01-10 | [21210](https://github.com/airbytehq/airbyte/pull/21210) | Update block size for json file format | -| 0.1.27 | 2022-12-08 | [20262](https://github.com/airbytehq/airbyte/pull/20262) | Check config settings for CSV file format | -| 0.1.26 | 2022-11-08 | [19006](https://github.com/airbytehq/airbyte/pull/19006) | Add virtual-hosted-style option | -| 0.1.24 | 2022-10-28 | [18602](https://github.com/airbytehq/airbyte/pull/18602) | Wrap errors into AirbyteTracedException pointing to a problem file | -| 0.1.23 | 2022-10-10 | [17800](https://github.com/airbytehq/airbyte/pull/17800) | Deleted `use_ssl` and `verify_ssl_cert` flags and hardcoded to `True` | -| 0.1.23 | 2022-10-10 | [17991](https://github.com/airbytehq/airbyte/pull/17991) | Fix pyarrow to JSON schema type conversion for arrays | -| 0.1.22 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state | -| 0.1.21 | 2022-09-20 | [16921](https://github.com/airbytehq/airbyte/pull/16921) | Upgrade pyarrow | -| 0.1.20 | 2022-09-12 | [16607](https://github.com/airbytehq/airbyte/pull/16607) | Fix for reading jsonl files containing nested structures | -| 0.1.19 | 2022-09-13 | [16631](https://github.com/airbytehq/airbyte/pull/16631) | Adjust column type to a broadest one when merging two or more json schemas | -| 0.1.18 | 2022-08-01 | [14213](https://github.com/airbytehq/airbyte/pull/14213) | Add support for jsonl format files. | -| 0.1.17 | 2022-07-21 | [14911](https://github.com/airbytehq/airbyte/pull/14911) | "decimal" type added for parquet | -| 0.1.16 | 2022-07-13 | [14669](https://github.com/airbytehq/airbyte/pull/14669) | Fixed bug when extra columns apeared to be non-present in master schema | -| 0.1.15 | 2022-05-31 | [12568](https://github.com/airbytehq/airbyte/pull/12568) | Fixed possible case of files being missed during incremental syncs | -| 0.1.14 | 2022-05-23 | [11967](https://github.com/airbytehq/airbyte/pull/11967) | Increase unit test coverage up to 90% | -| 0.1.13 | 2022-05-11 | [12730](https://github.com/airbytehq/airbyte/pull/12730) | Fixed empty options issue | -| 0.1.12 | 2022-05-11 | [12602](https://github.com/airbytehq/airbyte/pull/12602) | Added support for Avro file format | -| 0.1.11 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | -| 0.1.10 | 2022-01-28 | [8252](https://github.com/airbytehq/airbyte/pull/8252) | Refactoring of files' metadata | -| 0.1.9 | 2022-01-06 | [9163](https://github.com/airbytehq/airbyte/pull/9163) | Work-around for web-UI, `backslash - t` converts to `tab` for `format.delimiter` field. | -| 0.1.7 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.6 | 2021-10-15 | [6615](https://github.com/airbytehq/airbyte/pull/6615) & [7058](https://github.com/airbytehq/airbyte/pull/7058) | Memory and performance optimisation. Advanced options for CSV parsing. | -| 0.1.5 | 2021-09-24 | [6398](https://github.com/airbytehq/airbyte/pull/6398) | Support custom non Amazon S3 services | -| 0.1.4 | 2021-08-13 | [5305](https://github.com/airbytehq/airbyte/pull/5305) | Support of Parquet format | -| 0.1.3 | 2021-08-04 | [5197](https://github.com/airbytehq/airbyte/pull/5197) | Fixed bug where sync could hang indefinitely on schema inference | -| 0.1.2 | 2021-08-02 | [5135](https://github.com/airbytehq/airbyte/pull/5135) | Fixed bug in spec so it displays in UI correctly | -| 0.1.1 | 2021-07-30 | [4990](https://github.com/airbytehq/airbyte/pull/4990/commits/ff5f70662c5f84eabc03526cddfcc9d73c58c0f4) | Fixed documentation url in source definition | -| 0.1.0 | 2021-07-30 | [4990](https://github.com/airbytehq/airbyte/pull/4990) | Created S3 source connector | +| Version | Date | Pull Request | Subject | +|:------------|:-----------|:----------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 4.12.3 | 2025-03-13 | [55694](https://github.com/airbytehq/airbyte/pull/55694) | Fix bug where csv column name is 'type' | +| 4.12.2 | 2025-02-14 | [53684](https://github.com/airbytehq/airbyte/pull/53684) | Added `pendulum` to the dependencies | +| 4.12.1 | 2025-01-25 | [52509](https://github.com/airbytehq/airbyte/pull/52509) | Update dependencies | +| 4.12.0 | 2025-01-20 | [52030](https://github.com/airbytehq/airbyte/pull/52030) | Promoting release candidate 4.12.0-rc.1 to a main version. | +| 4.12.0-rc.1 | 2025-01-15 | [51474](https://github.com/airbytehq/airbyte/pull/51474) | Bump cdk to have preserve subdirectories (default) in copy raw files functionality | +| 4.11.4 | 2025-01-11 | [51370](https://github.com/airbytehq/airbyte/pull/51370) | Update dependencies | +| 4.11.3 | 2025-01-04 | [50932](https://github.com/airbytehq/airbyte/pull/50932) | Update dependencies | +| 4.11.2 | 2024-12-28 | [50739](https://github.com/airbytehq/airbyte/pull/50739) | Update dependencies | +| 4.11.1 | 2024-12-21 | [49042](https://github.com/airbytehq/airbyte/pull/49042) | Update dependencies | +| 4.11.0 | 2024-12-17 | [49824](https://github.com/airbytehq/airbyte/pull/49824) | Increase file size limit to 1.5GB | +| 4.10.2 | 2024-11-25 | [48613](https://github.com/airbytehq/airbyte/pull/48613) | Starting with this version, the Docker image is now rootless. Please note that this and future versions will not be compatible with Airbyte versions earlier than 0.64 | +| 4.10.1 | 2024-11-12 | [48346](https://github.com/airbytehq/airbyte/pull/48346) | Implement file-transfer capabilities | +| 4.9.2 | 2024-11-04 | [48259](https://github.com/airbytehq/airbyte/pull/48259) | Update dependencies | +| 4.9.1 | 2024-10-29 | [47038](https://github.com/airbytehq/airbyte/pull/47038) | Update dependencies | +| 4.9.0 | 2024-10-17 | [46973](https://github.com/airbytehq/airbyte/pull/46973) | Promote releae candidate. | +| 4.9.0-rc.1 | 2024-10-14 | [46298](https://github.com/airbytehq/airbyte/pull/46298) | Migrate to CDK v5 | +| 4.8.5 | 2024-10-12 | [46511](https://github.com/airbytehq/airbyte/pull/46511) | Update dependencies | +| 4.8.4 | 2024-09-28 | [46131](https://github.com/airbytehq/airbyte/pull/46131) | Update dependencies | +| 4.8.3 | 2024-09-21 | [45757](https://github.com/airbytehq/airbyte/pull/45757) | Update dependencies | +| 4.8.2 | 2024-09-14 | [45504](https://github.com/airbytehq/airbyte/pull/45504) | Update dependencies | +| 4.8.1 | 2024-09-07 | [45257](https://github.com/airbytehq/airbyte/pull/45257) | Update dependencies | +| 4.8.0 | 2024-09-03 | [44908](https://github.com/airbytehq/airbyte/pull/44908) | Migrate to CDK v3 | +| 4.7.8 | 2024-08-31 | [45009](https://github.com/airbytehq/airbyte/pull/45009) | Update dependencies | +| 4.7.7 | 2024-08-24 | [44732](https://github.com/airbytehq/airbyte/pull/44732) | Update dependencies | +| 4.7.6 | 2024-08-19 | [44380](https://github.com/airbytehq/airbyte/pull/44380) | Update dependencies | +| 4.7.5 | 2024-08-12 | [43868](https://github.com/airbytehq/airbyte/pull/43868) | Update dependencies | +| 4.7.4 | 2024-08-10 | [43667](https://github.com/airbytehq/airbyte/pull/43667) | Update dependencies | +| 4.7.3 | 2024-08-03 | [43083](https://github.com/airbytehq/airbyte/pull/43083) | Update dependencies | +| 4.7.2 | 2024-07-27 | [42814](https://github.com/airbytehq/airbyte/pull/42814) | Update dependencies | +| 4.7.1 | 2024-07-20 | [42205](https://github.com/airbytehq/airbyte/pull/42205) | Update dependencies | +| 4.7.0 | 2024-07-16 | [41934](https://github.com/airbytehq/airbyte/pull/41934) | Update to 3.5.1 CDK | +| 4.6.3 | 2024-07-13 | [41934](https://github.com/airbytehq/airbyte/pull/41934) | Update dependencies | +| 4.6.2 | 2024-07-10 | [41503](https://github.com/airbytehq/airbyte/pull/41503) | Update dependencies | +| 4.6.1 | 2024-07-09 | [40067](https://github.com/airbytehq/airbyte/pull/40067) | Update dependencies | +| 4.6.0 | 2024-06-26 | [39573](https://github.com/airbytehq/airbyte/pull/39573) | Improve performance: update to Airbyte CDK 2.0.0 | +| 4.5.17 | 2024-06-06 | [39214](https://github.com/airbytehq/airbyte/pull/39214) | [autopull] Upgrade base image to v1.2.2 | +| 4.5.16 | 2024-05-29 | [38674](https://github.com/airbytehq/airbyte/pull/38674) | Avoid error on empty stream when running discover | +| 4.5.15 | 2024-05-20 | [38252](https://github.com/airbytehq/airbyte/pull/38252) | Replace AirbyteLogger with logging.Logger | +| 4.5.14 | 2024-05-09 | [38090](https://github.com/airbytehq/airbyte/pull/38090) | Bump python-cdk version to include CSV field length fix | +| 4.5.13 | 2024-05-03 | [37776](https://github.com/airbytehq/airbyte/pull/37776) | Update `airbyte-cdk` to fix the `discovery` command issue | +| 4.5.12 | 2024-04-11 | [37001](https://github.com/airbytehq/airbyte/pull/37001) | Update airbyte-cdk to flush print buffer for every message | +| 4.5.11 | 2024-03-14 | [36160](https://github.com/airbytehq/airbyte/pull/36160) | Bump python-cdk version to include CSV tab delimiter fix | +| 4.5.10 | 2024-03-11 | [35955](https://github.com/airbytehq/airbyte/pull/35955) | Pin `transformers` transitive dependency | +| 4.5.9 | 2024-03-06 | [35857](https://github.com/airbytehq/airbyte/pull/35857) | Bump poetry.lock to upgrade transitive dependency | +| 4.5.8 | 2024-03-04 | [35808](https://github.com/airbytehq/airbyte/pull/35808) | Use cached AWS client | +| 4.5.7 | 2024-02-23 | [34895](https://github.com/airbytehq/airbyte/pull/34895) | Run incremental syncs with concurrency | +| 4.5.6 | 2024-02-21 | [35246](https://github.com/airbytehq/airbyte/pull/35246) | Fixes bug that occurred when creating CSV streams with tab delimiter. | +| 4.5.5 | 2024-02-18 | [35392](https://github.com/airbytehq/airbyte/pull/35392) | Add support filtering by start date | +| 4.5.4 | 2024-02-15 | [35055](https://github.com/airbytehq/airbyte/pull/35055) | Temporarily revert concurrency | +| 4.5.3 | 2024-02-12 | [35164](https://github.com/airbytehq/airbyte/pull/35164) | Manage dependencies with Poetry. | +| 4.5.2 | 2024-02-06 | [34930](https://github.com/airbytehq/airbyte/pull/34930) | Bump CDK version to fix issue when SyncMode is missing from catalog | +| 4.5.1 | 2024-02-02 | [31701](https://github.com/airbytehq/airbyte/pull/31701) | Add `region` support | +| 4.5.0 | 2024-02-01 | [34591](https://github.com/airbytehq/airbyte/pull/34591) | Run full refresh syncs concurrently | +| 4.4.1 | 2024-01-30 | [34665](https://github.com/airbytehq/airbyte/pull/34665) | Pin moto & CDK version | +| 4.4.0 | 2024-01-12 | [33818](https://github.com/airbytehq/airbyte/pull/33818) | Add IAM Role Authentication | +| 4.3.1 | 2024-01-04 | [33937](https://github.com/airbytehq/airbyte/pull/33937) | Prepare for airbyte-lib | +| 4.3.0 | 2023-12-14 | [33411](https://github.com/airbytehq/airbyte/pull/33411) | Bump CDK version to auto-set primary key for document file streams and support raw txt files | +| 4.2.4 | 2023-12-06 | [33187](https://github.com/airbytehq/airbyte/pull/33187) | Bump CDK version to hide source-defined primary key | +| 4.2.3 | 2023-11-16 | [32608](https://github.com/airbytehq/airbyte/pull/32608) | Improve document file type parser | +| 4.2.2 | 2023-11-20 | [32677](https://github.com/airbytehq/airbyte/pull/32677) | Only read files with ".zip" extension as zipped files | +| 4.2.1 | 2023-11-13 | [32357](https://github.com/airbytehq/airbyte/pull/32357) | Improve spec schema | +| 4.2.0 | 2023-11-02 | [32109](https://github.com/airbytehq/airbyte/pull/32109) | Fix docs; add HTTPS validation for S3 endpoint; fix coverage | +| 4.1.4 | 2023-10-30 | [31904](https://github.com/airbytehq/airbyte/pull/31904) | Update CDK | +| 4.1.3 | 2023-10-25 | [31654](https://github.com/airbytehq/airbyte/pull/31654) | Reduce image size | +| 4.1.2 | 2023-10-23 | [31383](https://github.com/airbytehq/airbyte/pull/31383) | Add handling NoSuchBucket error | +| 4.1.1 | 2023-10-19 | [31601](https://github.com/airbytehq/airbyte/pull/31601) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 4.1.0 | 2023-10-17 | [31340](https://github.com/airbytehq/airbyte/pull/31340) | Add reading files inside zip archive | +| 4.0.5 | 2023-10-16 | [31209](https://github.com/airbytehq/airbyte/pull/31209) | Add experimental Markdown/PDF/Docx file format | +| 4.0.4 | 2023-09-18 | [30476](https://github.com/airbytehq/airbyte/pull/30476) | Remove streams.\*.file_type from source-s3 configuration | +| 4.0.3 | 2023-09-13 | [30387](https://github.com/airbytehq/airbyte/pull/30387) | Bump Airbyte-CDK version to improve messages for record parse errors | +| 4.0.2 | 2023-09-07 | [28639](https://github.com/airbytehq/airbyte/pull/28639) | Always show S3 Key fields | +| 4.0.1 | 2023-09-06 | [30217](https://github.com/airbytehq/airbyte/pull/30217) | Migrate inference error to config errors and avoir sentry alerts | +| 4.0.0 | 2023-09-05 | [29757](https://github.com/airbytehq/airbyte/pull/29757) | New version using file-based CDK | +| 3.1.11 | 2023-08-30 | [29986](https://github.com/airbytehq/airbyte/pull/29986) | Add config error for conversion error | +| 3.1.10 | 2023-08-29 | [29943](https://github.com/airbytehq/airbyte/pull/29943) | Add config error for arrow invalid error | +| 3.1.9 | 2023-08-23 | [29753](https://github.com/airbytehq/airbyte/pull/29753) | Feature parity update for V4 release | +| 3.1.8 | 2023-08-17 | [29520](https://github.com/airbytehq/airbyte/pull/29520) | Update legacy state and error handling | +| 3.1.7 | 2023-08-17 | [29505](https://github.com/airbytehq/airbyte/pull/29505) | v4 StreamReader and Cursor fixes | +| 3.1.6 | 2023-08-16 | [29480](https://github.com/airbytehq/airbyte/pull/29480) | update Pyarrow to version 12.0.1 | +| 3.1.5 | 2023-08-15 | [29418](https://github.com/airbytehq/airbyte/pull/29418) | Avoid duplicate syncs when migrating from v3 to v4 | +| 3.1.4 | 2023-08-15 | [29382](https://github.com/airbytehq/airbyte/pull/29382) | Handle legacy path prefix & path pattern | +| 3.1.3 | 2023-08-05 | [29028](https://github.com/airbytehq/airbyte/pull/29028) | Update v3 & v4 connector to handle either state message | +| 3.1.2 | 2023-07-29 | [28786](https://github.com/airbytehq/airbyte/pull/28786) | Add a codepath for using the file-based CDK | +| 3.1.1 | 2023-07-26 | [28730](https://github.com/airbytehq/airbyte/pull/28730) | Add human readable error message and improve validation for encoding field when it empty | +| 3.1.0 | 2023-06-26 | [27725](https://github.com/airbytehq/airbyte/pull/27725) | License Update: Elv2 | +| 3.0.3 | 2023-06-23 | [27651](https://github.com/airbytehq/airbyte/pull/27651) | Handle Bucket Access Errors | +| 3.0.2 | 2023-06-22 | [27611](https://github.com/airbytehq/airbyte/pull/27611) | Fix start date | +| 3.0.1 | 2023-06-22 | [27604](https://github.com/airbytehq/airbyte/pull/27604) | Add logging for file reading | +| 3.0.0 | 2023-05-02 | [25127](https://github.com/airbytehq/airbyte/pull/25127) | Remove ab_additional column; Use platform-handled schema evolution | +| 2.2.0 | 2023-05-10 | [25937](https://github.com/airbytehq/airbyte/pull/25937) | Add support for Parquet Dataset | +| 2.1.4 | 2023-05-01 | [25361](https://github.com/airbytehq/airbyte/pull/25361) | Parse nested avro schemas | +| 2.1.3 | 2023-05-01 | [25706](https://github.com/airbytehq/airbyte/pull/25706) | Remove minimum block size for CSV check | +| 2.1.2 | 2023-04-18 | [25067](https://github.com/airbytehq/airbyte/pull/25067) | Handle block size related errors; fix config validator | +| 2.1.1 | 2023-04-18 | [25010](https://github.com/airbytehq/airbyte/pull/25010) | Refactor filter logic | +| 2.1.0 | 2023-04-10 | [25010](https://github.com/airbytehq/airbyte/pull/25010) | Add `start_date` field to filter files based on `LastModified` option | +| 2.0.4 | 2023-03-23 | [24429](https://github.com/airbytehq/airbyte/pull/24429) | Call `check` with a little block size to save time and memory. | +| 2.0.3 | 2023-03-17 | [24178](https://github.com/airbytehq/airbyte/pull/24178) | Support legacy datetime format for the period of migration, fix time-zone conversion. | +| 2.0.2 | 2023-03-16 | [24157](https://github.com/airbytehq/airbyte/pull/24157) | Return empty schema if `discover` finds no files; Do not infer extra data types when user defined schema is applied. | +| 2.0.1 | 2023-03-06 | [23195](https://github.com/airbytehq/airbyte/pull/23195) | Fix datetime format string | +| 2.0.0 | 2023-03-14 | [23189](https://github.com/airbytehq/airbyte/pull/23189) | Infer schema based on one file instead of all the files | +| 1.0.2 | 2023-03-02 | [23669](https://github.com/airbytehq/airbyte/pull/23669) | Made `Advanced Reader Options` and `Advanced Options` truly `optional` for `CSV` format | +| 1.0.1 | 2023-02-27 | [23502](https://github.com/airbytehq/airbyte/pull/23502) | Fix error handling | +| 1.0.0 | 2023-02-17 | [23198](https://github.com/airbytehq/airbyte/pull/23198) | Fix Avro schema discovery | +| 0.1.32 | 2023-02-07 | [22500](https://github.com/airbytehq/airbyte/pull/22500) | Speed up discovery | +| 0.1.31 | 2023-02-08 | [22550](https://github.com/airbytehq/airbyte/pull/22550) | Validate CSV read options and convert options | +| 0.1.30 | 2023-01-25 | [21587](https://github.com/airbytehq/airbyte/pull/21587) | Make sure spec works as expected in UI | +| 0.1.29 | 2023-01-19 | [21604](https://github.com/airbytehq/airbyte/pull/21604) | Handle OSError: skip unreachable keys and keep working on accessible ones. Warn a customer | +| 0.1.28 | 2023-01-10 | [21210](https://github.com/airbytehq/airbyte/pull/21210) | Update block size for json file format | +| 0.1.27 | 2022-12-08 | [20262](https://github.com/airbytehq/airbyte/pull/20262) | Check config settings for CSV file format | +| 0.1.26 | 2022-11-08 | [19006](https://github.com/airbytehq/airbyte/pull/19006) | Add virtual-hosted-style option | +| 0.1.24 | 2022-10-28 | [18602](https://github.com/airbytehq/airbyte/pull/18602) | Wrap errors into AirbyteTracedException pointing to a problem file | +| 0.1.23 | 2022-10-10 | [17800](https://github.com/airbytehq/airbyte/pull/17800) | Deleted `use_ssl` and `verify_ssl_cert` flags and hardcoded to `True` | +| 0.1.23 | 2022-10-10 | [17991](https://github.com/airbytehq/airbyte/pull/17991) | Fix pyarrow to JSON schema type conversion for arrays | +| 0.1.22 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state | +| 0.1.21 | 2022-09-20 | [16921](https://github.com/airbytehq/airbyte/pull/16921) | Upgrade pyarrow | +| 0.1.20 | 2022-09-12 | [16607](https://github.com/airbytehq/airbyte/pull/16607) | Fix for reading jsonl files containing nested structures | +| 0.1.19 | 2022-09-13 | [16631](https://github.com/airbytehq/airbyte/pull/16631) | Adjust column type to a broadest one when merging two or more json schemas | +| 0.1.18 | 2022-08-01 | [14213](https://github.com/airbytehq/airbyte/pull/14213) | Add support for jsonl format files. | +| 0.1.17 | 2022-07-21 | [14911](https://github.com/airbytehq/airbyte/pull/14911) | "decimal" type added for parquet | +| 0.1.16 | 2022-07-13 | [14669](https://github.com/airbytehq/airbyte/pull/14669) | Fixed bug when extra columns apeared to be non-present in master schema | +| 0.1.15 | 2022-05-31 | [12568](https://github.com/airbytehq/airbyte/pull/12568) | Fixed possible case of files being missed during incremental syncs | +| 0.1.14 | 2022-05-23 | [11967](https://github.com/airbytehq/airbyte/pull/11967) | Increase unit test coverage up to 90% | +| 0.1.13 | 2022-05-11 | [12730](https://github.com/airbytehq/airbyte/pull/12730) | Fixed empty options issue | +| 0.1.12 | 2022-05-11 | [12602](https://github.com/airbytehq/airbyte/pull/12602) | Added support for Avro file format | +| 0.1.11 | 2022-04-30 | [12500](https://github.com/airbytehq/airbyte/pull/12500) | Improve input configuration copy | +| 0.1.10 | 2022-01-28 | [8252](https://github.com/airbytehq/airbyte/pull/8252) | Refactoring of files' metadata | +| 0.1.9 | 2022-01-06 | [9163](https://github.com/airbytehq/airbyte/pull/9163) | Work-around for web-UI, `backslash - t` converts to `tab` for `format.delimiter` field. | +| 0.1.7 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.6 | 2021-10-15 | [6615](https://github.com/airbytehq/airbyte/pull/6615) & [7058](https://github.com/airbytehq/airbyte/pull/7058) | Memory and performance optimisation. Advanced options for CSV parsing. | +| 0.1.5 | 2021-09-24 | [6398](https://github.com/airbytehq/airbyte/pull/6398) | Support custom non Amazon S3 services | +| 0.1.4 | 2021-08-13 | [5305](https://github.com/airbytehq/airbyte/pull/5305) | Support of Parquet format | +| 0.1.3 | 2021-08-04 | [5197](https://github.com/airbytehq/airbyte/pull/5197) | Fixed bug where sync could hang indefinitely on schema inference | +| 0.1.2 | 2021-08-02 | [5135](https://github.com/airbytehq/airbyte/pull/5135) | Fixed bug in spec so it displays in UI correctly | +| 0.1.1 | 2021-07-30 | [4990](https://github.com/airbytehq/airbyte/pull/4990/commits/ff5f70662c5f84eabc03526cddfcc9d73c58c0f4) | Fixed documentation url in source definition | +| 0.1.0 | 2021-07-30 | [4990](https://github.com/airbytehq/airbyte/pull/4990) | Created S3 source connector |
From f4b446642554e3a0ccf67ebe9a28b5f8c34ef882 Mon Sep 17 00:00:00 2001 From: maxi297 Date: Wed, 12 Mar 2025 21:26:54 -0400 Subject: [PATCH 3/3] remove unused import --- .../connectors/source-s3/source_s3/v4/stream_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 72e23e90009f2..17bc50101df3d 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -6,8 +6,8 @@ import time from datetime import datetime from io import IOBase -from os import getenv, makedirs, path -from typing import Any, Dict, Iterable, List, Optional, Set, cast +from os import getenv +from typing import Dict, Iterable, List, Optional, Set, cast import boto3.session import pendulum