From 2dc66c75e80e9b88a6597058d43ce5bf3c4e636c Mon Sep 17 00:00:00 2001 From: Luka Peschke Date: Mon, 2 Dec 2024 17:50:10 +0100 Subject: [PATCH] refactor(azure_mssql): use sqlalchemy for connections [TCTC-9751] (#1841) * chore(deps): add sqlalchemy Signed-off-by: Luka Peschke * refactor(azure_mssql): use sqlalchemy for connections [TCTC-9751] Signed-off-by: Luka Peschke * feat: add typing Signed-off-by: Luka Peschke * fix(deps): add sqlalchemy to "all" extra Signed-off-by: Luka Peschke * Update toucan_connectors/common.py * fix: use .model_fields in mongo connector Signed-off-by: Luka Peschke --------- Signed-off-by: Luka Peschke --- .github/workflows/ci.yml | 5 + CHANGELOG.md | 4 + poetry.lock | 203 +++++++++++++++++- pyproject.toml | 30 +-- tests/azure_mssql/fixtures/world.sql | 59 +++++ tests/azure_mssql/test_azure_mssql.py | 76 ++++--- tests/test_common.py | 16 ++ .../azure_mssql/azure_mssql_connector.py | 65 +++--- toucan_connectors/common.py | 78 +++++-- toucan_connectors/mongo/mongo_connector.py | 2 +- 10 files changed, 436 insertions(+), 102 deletions(-) create mode 100644 tests/azure_mssql/fixtures/world.sql diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0e1bbfa54..682aca6fd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -86,6 +86,11 @@ jobs: ATHENA_REGION: '${{ secrets.ATHENA_REGION }}' # GBQ GOOGLE_BIG_QUERY_CREDENTIALS: '${{ secrets.GOOGLE_BIG_QUERY_CREDENTIALS }}' + # Azure MSSQL + AZURE_MSSQL_USER: '${{ secrets.AZURE_MSSQL_USER }}' + AZURE_MSSQL_PASSWORD: '${{ secrets.AZURE_MSSQL_PASSWORD }}' + AZURE_MSSQL_HOST: '${{ secrets.AZURE_MSSQL_HOST }}' + AZURE_MSSQL_DATABASE: '${{ secrets.AZURE_MSSQL_DATABASE }}' - name: SonarCloud Scan # Only executed for one of the tested python version and pandas version diff --git a/CHANGELOG.md b/CHANGELOG.md index c106d7424..5565420ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +### Changed + +- The Azure MSSQL connector now uses `sqlalchemy` to connect to MSSQL. + ## [7.3.3] 2024-11-21 ### Fixed diff --git a/poetry.lock b/poetry.lock index ca0415c4f..a5ce0a5ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiobotocore" @@ -1323,6 +1323,92 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] +[[package]] +name = "greenlet" +version = "3.1.1" +description = "Lightweight in-process concurrent programming" +optional = true +python-versions = ">=3.7" +files = [ + {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"}, + {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"}, + {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"}, + {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"}, + {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"}, + {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"}, + {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"}, + {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"}, + {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"}, + {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"}, + {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"}, + {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"}, + {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"}, + {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"}, + {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"}, + {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"}, + {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + [[package]] name = "grpcio" version = "1.62.1" @@ -3734,6 +3820,101 @@ files = [ {file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"}, ] +[[package]] +name = "sqlalchemy" +version = "2.0.36" +description = "Database Abstraction Library" +optional = true +python-versions = ">=3.7" +files = [ + {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59b8f3adb3971929a3e660337f5dacc5942c2cdb760afcabb2614ffbda9f9f72"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:37350015056a553e442ff672c2d20e6f4b6d0b2495691fa239d8aa18bb3bc908"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8318f4776c85abc3f40ab185e388bee7a6ea99e7fa3a30686580b209eaa35c08"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c245b1fbade9c35e5bd3b64270ab49ce990369018289ecfde3f9c318411aaa07"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:69f93723edbca7342624d09f6704e7126b152eaed3cdbb634cb657a54332a3c5"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f9511d8dd4a6e9271d07d150fb2f81874a3c8c95e11ff9af3a2dfc35fe42ee44"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-win32.whl", hash = "sha256:c3f3631693003d8e585d4200730616b78fafd5a01ef8b698f6967da5c605b3fa"}, + {file = "SQLAlchemy-2.0.36-cp310-cp310-win_amd64.whl", hash = "sha256:a86bfab2ef46d63300c0f06936bd6e6c0105faa11d509083ba8f2f9d237fb5b5"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fd3a55deef00f689ce931d4d1b23fa9f04c880a48ee97af488fd215cf24e2a6c"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4f5e9cd989b45b73bd359f693b935364f7e1f79486e29015813c338450aa5a71"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0ddd9db6e59c44875211bc4c7953a9f6638b937b0a88ae6d09eb46cced54eff"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2519f3a5d0517fc159afab1015e54bb81b4406c278749779be57a569d8d1bb0d"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59b1ee96617135f6e1d6f275bbe988f419c5178016f3d41d3c0abb0c819f75bb"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:39769a115f730d683b0eb7b694db9789267bcd027326cccc3125e862eb03bfd8"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-win32.whl", hash = "sha256:66bffbad8d6271bb1cc2f9a4ea4f86f80fe5e2e3e501a5ae2a3dc6a76e604e6f"}, + {file = "SQLAlchemy-2.0.36-cp311-cp311-win_amd64.whl", hash = "sha256:23623166bfefe1487d81b698c423f8678e80df8b54614c2bf4b4cfcd7c711959"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7b64e6ec3f02c35647be6b4851008b26cff592a95ecb13b6788a54ef80bbdd4"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:46331b00096a6db1fdc052d55b101dbbfc99155a548e20a0e4a8e5e4d1362855"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdf3386a801ea5aba17c6410dd1dc8d39cf454ca2565541b5ac42a84e1e28f53"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac9dfa18ff2a67b09b372d5db8743c27966abf0e5344c555d86cc7199f7ad83a"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:90812a8933df713fdf748b355527e3af257a11e415b613dd794512461eb8a686"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1bc330d9d29c7f06f003ab10e1eaced295e87940405afe1b110f2eb93a233588"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-win32.whl", hash = "sha256:79d2e78abc26d871875b419e1fd3c0bca31a1cb0043277d0d850014599626c2e"}, + {file = "SQLAlchemy-2.0.36-cp312-cp312-win_amd64.whl", hash = "sha256:b544ad1935a8541d177cb402948b94e871067656b3a0b9e91dbec136b06a2ff5"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5cc79df7f4bc3d11e4b542596c03826063092611e481fcf1c9dfee3c94355ef"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3c01117dd36800f2ecaa238c65365b7b16497adc1522bf84906e5710ee9ba0e8"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bc633f4ee4b4c46e7adcb3a9b5ec083bf1d9a97c1d3854b92749d935de40b9b"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e46ed38affdfc95d2c958de328d037d87801cfcbea6d421000859e9789e61c2"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b2985c0b06e989c043f1dc09d4fe89e1616aadd35392aea2844f0458a989eacf"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a121d62ebe7d26fec9155f83f8be5189ef1405f5973ea4874a26fab9f1e262c"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-win32.whl", hash = "sha256:0572f4bd6f94752167adfd7c1bed84f4b240ee6203a95e05d1e208d488d0d436"}, + {file = "SQLAlchemy-2.0.36-cp313-cp313-win_amd64.whl", hash = "sha256:8c78ac40bde930c60e0f78b3cd184c580f89456dd87fc08f9e3ee3ce8765ce88"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:be9812b766cad94a25bc63bec11f88c4ad3629a0cec1cd5d4ba48dc23860486b"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aae840ebbd6cdd41af1c14590e5741665e5272d2fee999306673a1bb1fdb4d"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4557e1f11c5f653ebfdd924f3f9d5ebfc718283b0b9beebaa5dd6b77ec290971"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07b441f7d03b9a66299ce7ccf3ef2900abc81c0db434f42a5694a37bd73870f2"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:28120ef39c92c2dd60f2721af9328479516844c6b550b077ca450c7d7dc68575"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-win32.whl", hash = "sha256:b81ee3d84803fd42d0b154cb6892ae57ea6b7c55d8359a02379965706c7efe6c"}, + {file = "SQLAlchemy-2.0.36-cp37-cp37m-win_amd64.whl", hash = "sha256:f942a799516184c855e1a32fbc7b29d7e571b52612647866d4ec1c3242578fcb"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3d6718667da04294d7df1670d70eeddd414f313738d20a6f1d1f379e3139a545"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:72c28b84b174ce8af8504ca28ae9347d317f9dba3999e5981a3cd441f3712e24"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b11d0cfdd2b095e7b0686cf5fabeb9c67fae5b06d265d8180715b8cfa86522e3"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e32092c47011d113dc01ab3e1d3ce9f006a47223b18422c5c0d150af13a00687"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6a440293d802d3011028e14e4226da1434b373cbaf4a4bbb63f845761a708346"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c54a1e53a0c308a8e8a7dffb59097bff7facda27c70c286f005327f21b2bd6b1"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-win32.whl", hash = "sha256:1e0d612a17581b6616ff03c8e3d5eff7452f34655c901f75d62bd86449d9750e"}, + {file = "SQLAlchemy-2.0.36-cp38-cp38-win_amd64.whl", hash = "sha256:8958b10490125124463095bbdadda5aa22ec799f91958e410438ad6c97a7b793"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dc022184d3e5cacc9579e41805a681187650e170eb2fd70e28b86192a479dcaa"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b817d41d692bf286abc181f8af476c4fbef3fd05e798777492618378448ee689"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4e46a888b54be23d03a89be510f24a7652fe6ff660787b96cd0e57a4ebcb46d"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ae3005ed83f5967f961fd091f2f8c5329161f69ce8480aa8168b2d7fe37f06"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03e08af7a5f9386a43919eda9de33ffda16b44eb11f3b313e6822243770e9763"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:3dbb986bad3ed5ceaf090200eba750b5245150bd97d3e67343a3cfed06feecf7"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-win32.whl", hash = "sha256:9fe53b404f24789b5ea9003fc25b9a3988feddebd7e7b369c8fac27ad6f52f28"}, + {file = "SQLAlchemy-2.0.36-cp39-cp39-win_amd64.whl", hash = "sha256:af148a33ff0349f53512a049c6406923e4e02bf2f26c5fb285f143faf4f0e46a"}, + {file = "SQLAlchemy-2.0.36-py3-none-any.whl", hash = "sha256:fddbe92b4760c6f5d48162aef14824add991aeda8ddadb3c31d56eb15ca69f8e"}, + {file = "sqlalchemy-2.0.36.tar.gz", hash = "sha256:7f2767680b6d2398aea7082e45a774b2b0767b5c8d8ffb9c8b683088ea9b29c5"}, +] + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +typing-extensions = ">=4.6.0" + +[package.extras] +aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] +aioodbc = ["aioodbc", "greenlet (!=0.4.17)"] +aiosqlite = ["aiosqlite", "greenlet (!=0.4.17)", "typing_extensions (!=3.10.0.1)"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (!=0.4.17)"] +mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"] +mssql = ["pyodbc"] +mssql-pymssql = ["pymssql"] +mssql-pyodbc = ["pyodbc"] +mypy = ["mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0)"] +mysql-connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=8)"] +oracle-oracledb = ["oracledb (>=1.0.1)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql-asyncpg = ["asyncpg", "greenlet (!=0.4.17)"] +postgresql-pg8000 = ["pg8000 (>=1.29.1)"] +postgresql-psycopg = ["psycopg (>=3.0.7)"] +postgresql-psycopg2binary = ["psycopg2-binary"] +postgresql-psycopg2cffi = ["psycopg2cffi"] +postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"] +pymysql = ["pymysql"] +sqlcipher = ["sqlcipher3_binary"] + [[package]] name = "tctc-odata" version = "0.3" @@ -4285,35 +4466,35 @@ test = ["coverage[toml] (==7.6.2)", "flake8 (==7.1.1)", "flake8-blind-except (== xmlsec = ["xmlsec (>=0.6.1)"] [extras] -all = ["Authlib", "PyJWT", "PyMySQL", "awswrangler", "clickhouse-driver", "cx-Oracle", "dataiku-api-client", "elasticsearch", "google-api-python-client", "google-cloud-bigquery", "hubspot-api-client", "jq", "lxml", "oauth2client", "oauthlib", "pandas", "pandas", "peakina", "psycopg2", "pyarrow", "pyhdb", "pymongo", "pyodbc", "python-graphql-client", "python-slugify", "redshift-connector", "requests-oauthlib", "simplejson", "snowflake-connector-python", "tctc-odata", "tenacity", "xmltodict", "zeep"] +all = ["Authlib", "PyJWT", "PyMySQL", "awswrangler", "clickhouse-driver", "cx-Oracle", "dataiku-api-client", "elasticsearch", "google-api-python-client", "google-cloud-bigquery", "hubspot-api-client", "jq", "lxml", "oauth2client", "oauthlib", "pandas", "pandas", "peakina", "psycopg2", "pyarrow", "pyhdb", "pymongo", "pyodbc", "python-graphql-client", "python-slugify", "redshift-connector", "requests-oauthlib", "simplejson", "snowflake-connector-python", "sqlalchemy", "tctc-odata", "tenacity", "xmltodict", "zeep"] awsathena = ["awswrangler", "pandas", "pandas"] -azure-mssql = ["pandas", "pandas", "pyodbc"] +azure-mssql = ["pandas", "pandas", "pyodbc", "sqlalchemy"] base = ["Authlib", "jq", "pandas", "pandas", "pandas", "pandas", "python-slugify", "tenacity"] -clickhouse = ["clickhouse-driver", "pandas", "pandas"] +clickhouse = ["clickhouse-driver", "pandas", "pandas", "sqlalchemy"] dataiku = ["dataiku-api-client", "pandas", "pandas"] elasticsearch = ["elasticsearch", "pandas", "pandas"] github = ["pandas", "pandas", "python-graphql-client"] google-analytics = ["google-api-python-client", "oauth2client", "pandas", "pandas"] google-big-query = ["google-cloud-bigquery", "pandas", "pandas"] -google-cloud-mysql = ["PyMySQL", "pandas", "pandas"] +google-cloud-mysql = ["PyMySQL", "pandas", "pandas", "sqlalchemy"] google-sheets = ["google-api-python-client", "pandas", "pandas"] http-api = ["oauthlib", "pandas", "pandas", "requests-oauthlib", "xmltodict"] hubspot = ["hubspot-api-client", "pandas", "pandas"] mongo = ["pandas", "pandas", "pymongo"] -mssql = ["pandas", "pandas", "pyodbc"] +mssql = ["pandas", "pandas", "pyodbc", "sqlalchemy"] mssql-tlsv1-0 = ["pandas", "pandas", "pyodbc"] -mysql = ["PyMySQL", "pandas", "pandas"] +mysql = ["PyMySQL", "pandas", "pandas", "sqlalchemy"] odata = ["oauthlib", "pandas", "pandas", "requests-oauthlib", "tctc-odata"] odbc = ["pandas", "pandas"] -oracle-sql = ["cx-Oracle", "pandas", "pandas"] +oracle-sql = ["cx-Oracle", "pandas", "pandas", "sqlalchemy"] peakina = ["pandas", "pandas", "peakina"] -postgres = ["pandas", "pandas", "psycopg2"] +postgres = ["pandas", "pandas", "psycopg2", "sqlalchemy"] redshift = ["lxml", "pandas", "pandas", "redshift-connector"] -sap-hana = ["pandas", "pandas", "pyhdb"] +sap-hana = ["pandas", "pandas", "pyhdb", "sqlalchemy"] snowflake = ["PyJWT", "pandas", "pandas", "pyarrow", "snowflake-connector-python"] soap = ["lxml", "pandas", "pandas", "zeep"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "8e5166deba64593dd13c76ad7afbb19d3932a36f92a7a4523022ea4a7169ed04" +content-hash = "525fb15be21b02f048a8077b8b372bd49a40460dfd6c7d8bd11a17790c6c5105" diff --git a/pyproject.toml b/pyproject.toml index 3e81a4448..d66b5b081 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ redshift-connector = { version = "^2.0.907", optional = true } requests-oauthlib = { version = "2.0.0", optional = true } simplejson = { version = "^3.17.6", optional = true } snowflake-connector-python = { version = ">=2.7.12,<4.0.0", optional = true } +sqlalchemy = { version = "^2", optional = true } tctc-odata = { version = ">=0.3,<1.0", optional = true } tenacity = { version = ">=8.0.1,<10.0.0", optional = true } xmltodict = { version = ">=0.13.0,<1.0", optional = true } @@ -78,36 +79,36 @@ types-pymysql = "^1.1.0.20241103" [tool.poetry.extras] awsathena = ["awswrangler", "pandas"] -azure_mssql = ["pyodbc", "pandas"] +azure_mssql = ["pyodbc", "pandas", "sqlalchemy"] base = ["Authlib", "jq", "pandas", "python-slugify", "tenacity", "pandas"] -clickhouse = ["clickhouse-driver", "pandas"] +clickhouse = ["clickhouse-driver", "pandas", "sqlalchemy"] dataiku = ["dataiku-api-client", "pandas"] elasticsearch = ["elasticsearch", "pandas"] github = ["python-graphql-client", "pandas"] google_analytics = ["google-api-python-client", "oauth2client", "pandas"] google_big_query = ["google-cloud-bigquery", "pandas"] -google_cloud_mysql = ["PyMySQL", "pandas"] +google_cloud_mysql = ["PyMySQL", "pandas", "sqlalchemy"] google_sheets = ["google-api-python-client", "pandas"] http_api = ["oauthlib", "requests-oauthlib", "xmltodict", "pandas"] hubspot = ["hubspot-api-client", "pandas"] mongo = ["pymongo", "pandas"] -mssql = ["pyodbc", "pandas"] +mssql = ["pyodbc", "pandas", "sqlalchemy"] mssql_TLSv1_0 = ["pyodbc", "pandas"] -mysql = ["PyMySQL", "pandas"] +mysql = ["PyMySQL", "pandas", "sqlalchemy"] odata = ["oauthlib", "requests-oauthlib", "tctc_odata", "pandas"] odbc = ["pydobc", "pandas"] -oracle_sql = ["cx_Oracle", "pandas"] +oracle_sql = ["cx_Oracle", "pandas", "sqlalchemy"] peakina = ["peakina", "pandas"] -postgres = ["psycopg2", "pandas"] +postgres = ["psycopg2", "pandas", "sqlalchemy"] Redshift = ["redshift_connector", "lxml", "pandas"] -sap_hana = ["pyhdb", "pandas"] +sap_hana = ["pyhdb", "pandas", "sqlalchemy"] snowflake = ["snowflake-connector-python", "PyJWT", "pyarrow", "pandas"] soap = ["zeep", "lxml", "pandas"] # All all = [ - "awswrangler", "Authlib", + "awswrangler", "clickhouse-driver", "cx-Oracle", "dataiku-api-client", @@ -120,12 +121,12 @@ all = [ "oauth2client", "oauthlib", "openpyxl", + "pandas", "peakina", "psycopg2", "pyarrow", "pyhdb", "PyJWT", - "pandas", "pymongo", "PyMySQL", "pyodbc", @@ -135,8 +136,9 @@ all = [ "requests-oauthlib", "simplejson", "snowflake-connector-python", - "tenacity", + "sqlalchemy", "tctc-odata", + "tenacity", "xmltodict", "zeep", ] @@ -162,15 +164,17 @@ ignore_missing_imports = true files = [ "toucan_connectors/auth.py", "toucan_connectors/awsathena/awsathena_connector.py", + "toucan_connectors/azure_mssql/azure_mssql_connector.py", + "toucan_connectors/common.py", "toucan_connectors/google_big_query/google_big_query_connector.py", "toucan_connectors/hubspot_private_app/hubspot_connector.py", "toucan_connectors/mongo/mongo_connector.py", + "toucan_connectors/mysql/mysql_connector.py", "toucan_connectors/peakina/peakina_connector.py", "toucan_connectors/postgres/postgresql_connector.py", - "toucan_connectors/mysql/mysql_connector.py", + "toucan_connectors/redshift/redshift_database_connector.py", "toucan_connectors/snowflake/snowflake_connector.py", "toucan_connectors/snowflake_oauth2/snowflake_oauth2_connector.py", - "toucan_connectors/redshift/redshift_database_connector.py", "toucan_connectors/toucan_connector.py", ] diff --git a/tests/azure_mssql/fixtures/world.sql b/tests/azure_mssql/fixtures/world.sql new file mode 100644 index 000000000..8de432279 --- /dev/null +++ b/tests/azure_mssql/fixtures/world.sql @@ -0,0 +1,59 @@ +DROP TABLE IF EXISTS City; +CREATE TABLE City ( + ID int, + Name varchar(35) NOT NULL DEFAULT '', + CountryCode char(3) NOT NULL DEFAULT '', + District varchar(20) NOT NULL DEFAULT '', + Population int NOT NULL DEFAULT '0' +) + +INSERT INTO City VALUES (1,'Kabul','AFG','Kabol',1780000); +INSERT INTO City VALUES (2,'Qandahar','AFG','Qandahar',237500); +INSERT INTO City VALUES (3,'Herat','AFG','Herat',186800); +INSERT INTO City VALUES (4,'Mazar-e-Sharif','AFG','Balkh',127800); +INSERT INTO City VALUES (5,'Amsterdam','NLD','Noord-Holland',731200); +INSERT INTO City VALUES (6,'Rotterdam','NLD','Zuid-Holland',593321); +INSERT INTO City VALUES (7,'Haag','NLD','Zuid-Holland',440900); +INSERT INTO City VALUES (8,'Utrecht','NLD','Utrecht',234323); +INSERT INTO City VALUES (9,'Eindhoven','NLD','Noord-Brabant',201843); +INSERT INTO City VALUES (10,'Tilburg','NLD','Noord-Brabant',193238); +INSERT INTO City VALUES (11,'Groningen','NLD','Groningen',172701); +INSERT INTO City VALUES (12,'Breda','NLD','Noord-Brabant',160398); +INSERT INTO City VALUES (13,'Apeldoorn','NLD','Gelderland',153491); +INSERT INTO City VALUES (14,'Nijmegen','NLD','Gelderland',152463); +INSERT INTO City VALUES (15,'Enschede','NLD','Overijssel',149544); +INSERT INTO City VALUES (16,'Haarlem','NLD','Noord-Holland',148772); +INSERT INTO City VALUES (17,'Almere','NLD','Flevoland',142465); +INSERT INTO City VALUES (18,'Arnhem','NLD','Gelderland',138020); +INSERT INTO City VALUES (19,'Zaanstad','NLD','Noord-Holland',135621); +INSERT INTO City VALUES (20,'�s-Hertogenbosch','NLD','Noord-Brabant',129170); +INSERT INTO City VALUES (21,'Amersfoort','NLD','Utrecht',126270); +INSERT INTO City VALUES (22,'Maastricht','NLD','Limburg',122087); +INSERT INTO City VALUES (23,'Dordrecht','NLD','Zuid-Holland',119811); +INSERT INTO City VALUES (24,'Leiden','NLD','Zuid-Holland',117196); +INSERT INTO City VALUES (25,'Haarlemmermeer','NLD','Noord-Holland',110722); +INSERT INTO City VALUES (26,'Zoetermeer','NLD','Zuid-Holland',110214); +INSERT INTO City VALUES (27,'Emmen','NLD','Drenthe',105853); +INSERT INTO City VALUES (28,'Zwolle','NLD','Overijssel',105819); +INSERT INTO City VALUES (29,'Ede','NLD','Gelderland',101574); +INSERT INTO City VALUES (30,'Delft','NLD','Zuid-Holland',95268); +INSERT INTO City VALUES (31,'Heerlen','NLD','Limburg',95052); +INSERT INTO City VALUES (32,'Alkmaar','NLD','Noord-Holland',92713); +INSERT INTO City VALUES (33,'Willemstad','ANT','Cura�ao',2345); +INSERT INTO City VALUES (34,'Tirana','ALB','Tirana',270000); +INSERT INTO City VALUES (35,'Alger','DZA','Alger',2168000); +INSERT INTO City VALUES (36,'Oran','DZA','Oran',609823); +INSERT INTO City VALUES (37,'Constantine','DZA','Constantine',443727); +INSERT INTO City VALUES (38,'Annaba','DZA','Annaba',222518); +INSERT INTO City VALUES (39,'Batna','DZA','Batna',183377); +INSERT INTO City VALUES (40,'S�tif','DZA','S�tif',179055); +INSERT INTO City VALUES (41,'Sidi Bel Abb�s','DZA','Sidi Bel Abb�s',153106); +INSERT INTO City VALUES (42,'Skikda','DZA','Skikda',128747); +INSERT INTO City VALUES (43,'Biskra','DZA','Biskra',128281); +INSERT INTO City VALUES (44,'Blida (el-Boulaida)','DZA','Blida',127284); +INSERT INTO City VALUES (45,'B�ja�a','DZA','B�ja�a',117162); +INSERT INTO City VALUES (46,'Mostaganem','DZA','Mostaganem',115212); +INSERT INTO City VALUES (47,'T�bessa','DZA','T�bessa',112007); +INSERT INTO City VALUES (48,'Tlemcen (Tilimsen)','DZA','Tlemcen',110242); +INSERT INTO City VALUES (49,'B�char','DZA','B�char',107311); +INSERT INTO City VALUES (50,'Tiaret','DZA','Tiaret',100118); diff --git a/tests/azure_mssql/test_azure_mssql.py b/tests/azure_mssql/test_azure_mssql.py index edc846e97..ff545c9b1 100644 --- a/tests/azure_mssql/test_azure_mssql.py +++ b/tests/azure_mssql/test_azure_mssql.py @@ -1,39 +1,51 @@ +from os import environ + +import pandas as pd +import pytest +from pandas.testing import assert_frame_equal + from toucan_connectors.azure_mssql.azure_mssql_connector import ( AzureMSSQLConnector, AzureMSSQLDataSource, ) -def test_connection_params(): - connector = AzureMSSQLConnector(host="my_host", user="my_user", name="") - params = connector.get_connection_params() - assert params["server"] == "my_host.database.windows.net" - assert params["user"] == "my_user@my_host" - - connector = AzureMSSQLConnector(host="my_host.database.windows.net", user="my_user", password="", name="") - params = connector.get_connection_params() - assert params["server"] == "my_host.database.windows.net" - assert params["user"] == "my_user@my_host" - - connector = AzureMSSQLConnector(host="my_host.database.windows.net", user="my_user@my_host", password="", name="") - params = connector.get_connection_params() - assert params["server"] == "my_host.database.windows.net" - assert params["user"] == "my_user@my_host" - - -def test_gcmysql_get_df(mocker): - snock = mocker.patch("pyodbc.connect") - reasq = mocker.patch("pandas.read_sql") - - mssql_connector = AzureMSSQLConnector(name="test", host="localhost", user="ubuntu", password="ilovetoucan") - ds = AzureMSSQLDataSource(domain="test", name="test", database="mssql_db", query="my_query") - mssql_connector.get_df(ds) - - snock.assert_called_once_with( - server="localhost.database.windows.net", - user="ubuntu@localhost", - database="mssql_db", - password="ilovetoucan", - driver="{ODBC Driver 17 for SQL Server}", +@pytest.fixture +def connector() -> AzureMSSQLConnector: + user = environ["AZURE_MSSQL_USER"] + password = environ["AZURE_MSSQL_PASSWORD"] + host = environ["AZURE_MSSQL_HOST"] + return AzureMSSQLConnector(name="azure-mssql-ci", user=user, host=host, password=password, connect_timeout=3) + + +@pytest.fixture +def datasource() -> AzureMSSQLDataSource: + database = environ["AZURE_MSSQL_DATABASE"] + return AzureMSSQLDataSource(domain="azure-mssql-ci", name="Azure MSSQL CI", database=database, query="SELECT 1;") + + +def test_azure_get_df_simple(connector: AzureMSSQLConnector, datasource: AzureMSSQLDataSource) -> None: + datasource.query = "SELECT name, population FROM City WHERE name = 'Maastricht';" + df = connector.get_df(datasource) + expected = pd.DataFrame({"name": ["Maastricht"], "population": [122_087]}) + assert_frame_equal(df, expected) + + +def test_azure_get_df_with_parameters_and_modulo( + connector: AzureMSSQLConnector, datasource: AzureMSSQLDataSource +) -> None: + datasource.query = "SELECT * FROM City WHERE CountryCode = {{ Code }} AND Population % 1000 >= 700" + datasource.parameters = {"Code": "AFG"} + df = connector.get_df(datasource) + assert_frame_equal( + df, + pd.DataFrame( + { + "ID": [3, 4], + "Name": ["Herat", "Mazar-e-Sharif"], + "CountryCode": ["AFG", "AFG"], + "District": ["Herat", "Balkh"], + "Population": [186_800, 127_800], + } + ), ) - reasq.assert_called_once_with("my_query", con=snock(), params=[]) diff --git a/tests/test_common.py b/tests/test_common.py index 2535a6fab..2ec7ee4aa 100644 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -14,6 +14,7 @@ UndefinedVariableError, adapt_param_type, apply_query_parameters, + convert_jinja_params_to_sqlalchemy_named, convert_to_numeric_paramstyle, convert_to_printf_templating_style, convert_to_qmark_paramstyle, @@ -563,3 +564,18 @@ def test_convert_pyformat_to_numeric(query, params, expected_query, expected_ord converted_query, ordered_values = convert_to_numeric_paramstyle(query, params) assert ordered_values == expected_ordered_values assert converted_query == expected_query + + +@pytest.mark.parametrize( + "query,expected", + [ + ("SELECT * FROM my_table;", "SELECT * FROM my_table;"), + ( + "SELECT name, population FROM City WHERE name SIMILAR TO '%aastri%' AND population >= {{min_pop}}", + "SELECT name, population FROM City WHERE name SIMILAR TO '%aastri%' AND population >= :min_pop", + ), + ], +) +def test_convert_jinja_params_to_sqlalchemy_named(query: str, expected: str) -> None: + result = convert_jinja_params_to_sqlalchemy_named(query) + assert result == expected diff --git a/toucan_connectors/azure_mssql/azure_mssql_connector.py b/toucan_connectors/azure_mssql/azure_mssql_connector.py index c3ddcbf23..3a51846e4 100644 --- a/toucan_connectors/azure_mssql/azure_mssql_connector.py +++ b/toucan_connectors/azure_mssql/azure_mssql_connector.py @@ -3,27 +3,33 @@ try: import pandas as pd - import pyodbc CONNECTOR_OK = True except ImportError as exc: # pragma: no cover getLogger(__name__).warning(f"Missing dependencies for {__name__}: {exc}") CONNECTOR_OK = False -from typing import Annotated +from typing import TYPE_CHECKING, Annotated -from pydantic import Field, StringConstraints +from pydantic import Field, SecretStr, StringConstraints -from toucan_connectors.common import pandas_read_sql +from toucan_connectors.common import ( + convert_jinja_params_to_sqlalchemy_named, + create_sqlalchemy_engine, + pandas_read_sqlalchemy_query, +) from toucan_connectors.toucan_connector import PlainJsonSecretStr, ToucanConnector, ToucanDataSource +if TYPE_CHECKING: + import sqlalchemy as sa + CLOUD_HOST = "database.windows.net" class AzureMSSQLDataSource(ToucanDataSource): database: str = Field(..., description="The name of the database you want to query") query: Annotated[str, StringConstraints(min_length=1)] = Field( - ..., description="You can write your SQL query here", widget="sql" + ..., description="You can write your SQL query here", json_schema_extra={"widget": "sql"} ) @@ -39,43 +45,46 @@ class AzureMSSQLConnector(ToucanConnector, data_source_model=AzureMSSQLDataSourc ) user: str = Field(..., description="Your login username") - password: PlainJsonSecretStr = Field("", description="Your login password") - connect_timeout: int = Field( + password: PlainJsonSecretStr = Field(SecretStr(""), description="Your login password") + connect_timeout: int | None = Field( None, title="Connection timeout", description="You can set a connection timeout in seconds here, i.e. the maximum length of " "time you want to wait for the server to respond. None by default", ) - def get_connection_params(self, *, database=None): + def _create_engine(self, database: str | None) -> "sa.Engine": + from sqlalchemy.engine import URL + base_host = re.sub(f".{CLOUD_HOST}$", "", self.host) + host = f"{base_host}.{CLOUD_HOST}" user = f"{self.user}@{base_host}" if "@" not in self.user else self.user - if not self.password: - self.password = PlainJsonSecretStr("") + password = self.password.get_secret_value() if self.password else None - con_params = { - "driver": "{ODBC Driver 17 for SQL Server}", - "server": f"{base_host}.{CLOUD_HOST}", - "database": database, - "user": user, - "password": self.password.get_secret_value(), - "timeout": self.connect_timeout, + query_params: dict[str, str] = { + "driver": "ODBC Driver 17 for SQL Server", } - # remove None values - return {k: v for k, v in con_params.items() if v is not None} + if self.connect_timeout: + query_params["timeout"] = str(self.connect_timeout) + + connection_url = URL.create( + "mssql+pyodbc", + username=user, + password=password, + host=host, + database=database, + query=query_params, + ) + return create_sqlalchemy_engine(connection_url) def _retrieve_data(self, datasource: AzureMSSQLDataSource) -> "pd.DataFrame": - connection = pyodbc.connect(**self.get_connection_params(database=datasource.database)) + sa_engine = self._create_engine(database=datasource.database) query_params = datasource.parameters or {} - df = pandas_read_sql( - datasource.query, - con=connection, - params=query_params, - convert_to_qmark=True, - render_user=True, - ) + # {{param}} -> :param + query = convert_jinja_params_to_sqlalchemy_named(datasource.query) + + df = pandas_read_sqlalchemy_query(query=query, engine=sa_engine, params=query_params) - connection.close() return df diff --git a/toucan_connectors/common.py b/toucan_connectors/common.py index d27173368..6a70b0e91 100644 --- a/toucan_connectors/common.py +++ b/toucan_connectors/common.py @@ -16,6 +16,7 @@ if TYPE_CHECKING: # pragma: no cover import pandas as pd + import sqlalchemy as sa # Query interpolation @@ -62,12 +63,12 @@ def is_jinja_alone(s: str) -> bool: return False -def _has_parameters(query: dict | list[dict] | tuple | str) -> bool: +def _has_parameters(query: str) -> bool: t = Environment().parse(query) # noqa: S701 return bool(meta.find_undeclared_variables(t) or re.search(RE_PARAM, query)) -def _prepare_parameters(p: dict | list[dict] | tuple | str) -> dict | list[dict] | tuple | str: +def _prepare_parameters(p: dict | list[dict] | tuple | str) -> dict | list[Any] | tuple | str: if isinstance(p, str): return repr(p) elif isinstance(p, list): @@ -78,7 +79,7 @@ def _prepare_parameters(p: dict | list[dict] | tuple | str) -> dict | list[dict] return p -def _prepare_result(res: dict | list[dict] | tuple | str) -> dict | list[dict] | tuple | str: +def _prepare_result(res: dict | list[dict] | tuple | str) -> dict | list[Any] | tuple | str: if isinstance(res, str): return ast.literal_eval(res) elif isinstance(res, list): @@ -163,8 +164,8 @@ def _render_query(query: dict | list[dict] | tuple | str, parameters: dict | Non clean_p = deepcopy(parameters) if is_jinja_alone(query): - clean_p = _prepare_parameters(clean_p) - env = NativeEnvironment() + clean_p = _prepare_parameters(clean_p) # type:ignore[assignment] + env: Environment | NativeEnvironment = NativeEnvironment() else: env = Environment() # noqa: S701 @@ -242,7 +243,7 @@ def _flatten_dict(p, parent_key=""): # jq filtering -def transform_with_jq(data: object, jq_filter: str) -> list: +def transform_with_jq(data: Any, jq_filter: str) -> list: import jq data = jq.all(jq_filter, data) @@ -316,6 +317,16 @@ def get_param_name(printf_style_argument: str) -> str: return printf_style_argument[2:-2] +def convert_jinja_params_to_sqlalchemy_named(query: str) -> str: + """Converts jinja params to SQLAlchemy named parameters. + + Naively transforms '{{ foo }}' to :foo using regex substitution. + + Note that the resulting query should not be used directly, but wrapped with `sqlalchemy.text` + """ + return re.sub(RE_SINGLE_VAR_JINJA, r":\g<1>", query) + + def convert_to_qmark_paramstyle(query_string: str, params_values: dict) -> tuple[str, list[Any]]: """Takes a query in pyformat paramstyle and transforms it in qmark by replacing placeholders by ? and returning values in right order @@ -333,7 +344,7 @@ def convert_to_qmark_paramstyle(query_string: str, params_values: dict) -> tuple if isinstance(o, list): # in the query string, replace the ? at index i by the number of item # in the provided parameter of type list - query_string = query_string.replace(extracted_params[i], f'({",".join(len(ordered_values[i])*["?"])})') + query_string = query_string.replace(extracted_params[i], f'({",".join(len(o)*["?"])})') flattened_values = [] for val in ordered_values: @@ -366,7 +377,7 @@ def convert_to_numeric_paramstyle(query_string: str, params_values: dict) -> tup # query_string = "SELECT name FROM students WHERE age IN %(allowed_ages)" # allowed_ages = [16, 17, 18] # transformed query_string = "SELECT name FROM students WHERE age IN (:1,:2,:3)" - list_size = len(ordered_values[i]) + list_size = len(o) variable_list = f'({",".join([f":{variable_idx + n}" for n in range(list_size)])})' query_string = query_string.replace(extracted_params[i], variable_list) variable_idx += list_size @@ -382,7 +393,8 @@ def convert_to_numeric_paramstyle(query_string: str, params_values: dict) -> tup else: flattened_values.append(val) - return query_string, flattened_values + # NOTE: we should probably return tuple(flattened_values) here but it could be breaking + return query_string, flattened_values # type:ignore[return-value] def convert_to_printf_templating_style(query_string: str) -> str: @@ -402,15 +414,14 @@ def adapt_param_type(params): return {k: (tuple(v) if isinstance(v, list) else v) for (k, v) in params.items()} -def extract_table_name(query: str) -> str: +def extract_table_name(query: str) -> str | None: m = re.search(r"from\s*(?P[^\s;]+)\s*(where|order by|group by|limit)?", query, re.I) - table = m.group("table") - return table + return m.group("table") if m else None def is_interpolating_table_name(query: str) -> bool: table_name = extract_table_name(query) - return table_name.startswith("%(") + return bool(table_name and table_name.startswith("%(")) def infer_datetime_dtype(df: "pd.DataFrame") -> None: @@ -445,8 +456,8 @@ def rename_duplicate_columns(df: "pd.DataFrame") -> None: cols = pd.Series(df.columns) for dup in df.columns[df.columns.duplicated(keep=False)]: - cols[df.columns.get_loc(dup)] = [f"{dup}_{d_idx}" for d_idx in range(df.columns.get_loc(dup).sum())] - df.columns = cols + cols[df.columns.get_loc(dup)] = [f"{dup}_{d_idx}" for d_idx in range(df.columns.get_loc(dup).sum())] # type:ignore[union-attr] + df.columns = cols # type:ignore[assignment] def pandas_read_sql( @@ -480,10 +491,43 @@ def pandas_read_sql( query = query.replace("%%", "%") query = re.sub(r"%[^(%]", r"%\g<0>", query) df = pd.read_sql(query, con=con, params=params, **kwargs) - except pd.io.sql.DatabaseError as exc: + except pd.errors.DatabaseError as exc: + if is_interpolating_table_name(query): + errmsg = f"Execution failed on sql '{query}': interpolating table name is forbidden" + raise pd.errors.DatabaseError(errmsg) from exc + else: + raise + + rename_duplicate_columns(df) + infer_datetime_dtype(df) + return df + + +def create_sqlalchemy_engine(url: "sa.URL") -> "sa.Engine": + """Creates an SQLAlchemy engine for the given URL. + + Sets sensible connector-specific defaults, such as disabling connection pooling. + """ + import sqlalchemy as sa + + return sa.create_engine(url, poolclass=sa.NullPool) + + +def pandas_read_sqlalchemy_query( + *, query: str, engine: "sa.Engine", params: dict[str, Any] | None = None +) -> "pd.DataFrame": + import pandas as pd + import sqlalchemy as sa + + sa_query = sa.text(query) + + try: + conn = engine.connect() + df = pd.read_sql_query(sa_query, conn, params=params) + except (pd.errors.DatabaseError, sa.exc.SQLAlchemyError) as exc: if is_interpolating_table_name(query): errmsg = f"Execution failed on sql '{query}': interpolating table name is forbidden" - raise pd.io.sql.DatabaseError(errmsg) from exc + raise pd.errors.DatabaseError(errmsg) from exc else: raise diff --git a/toucan_connectors/mongo/mongo_connector.py b/toucan_connectors/mongo/mongo_connector.py index e2d711f32..6956ce3c2 100644 --- a/toucan_connectors/mongo/mongo_connector.py +++ b/toucan_connectors/mongo/mongo_connector.py @@ -197,7 +197,7 @@ def _get_details(index: int, status: Optional[bool]): def _get_mongo_client_kwargs(self) -> dict[str, Any]: # We don't want parent class attributes nor the `client` property # nor attributes with `None` value - to_exclude = set(ToucanConnector.__pydantic_fields__.keys()) | {"client", "max_pool_size"} + to_exclude = set(ToucanConnector.model_fields.keys()) | {"client", "max_pool_size"} mongo_client_kwargs = self.model_dump(exclude=to_exclude, exclude_none=True).copy() if "password" in mongo_client_kwargs: