diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 640905f51..e33ee59b8 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,66 +2,66 @@ name: 🐛 Bug Report description: Create a report to help us reproduce and fix the bug body: -- type: markdown - attributes: - value: > - #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/gventuri/pandas-ai/issues?q=is%3Aissue+sort%3Acreated-desc+). -- type: textarea - id: system-info - attributes: - label: System Info - description: | - Please share your system info with us. - OS version: - Python version: - The current version of `pandasai` being used: + - type: markdown + attributes: + value: > + #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/gventuri/pandas-ai/issues?q=is%3Aissue+sort%3Acreated-desc+). + - type: textarea + id: system-info + attributes: + label: System Info + description: | + Please share your system info with us. + OS version: + Python version: + The current version of `pandasai` being used: - placeholder: pandasai version, platform, python version, ... - validations: - required: true + placeholder: pandasai version, platform, python version, ... + validations: + required: true -- type: textarea - attributes: - label: 🐛 Describe the bug - description: | - Please provide a clear and concise description of what the bug is. + - type: textarea + attributes: + label: 🐛 Describe the bug + description: | + Please provide a clear and concise description of what the bug is. - If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example: + If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example: - ```python - # All necessary imports at the beginning - import pandas as pd - from pandasai import SmartDataframe - - # Sample DataFrame - df = pd.DataFrame({ - "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] - }) - - # Instantiate a LLM - from pandasai.llm import OpenAI - llm = OpenAI(api_token="YOUR_API_TOKEN") - - df = SmartDataframe(df, config={"llm": llm}) - df.chat('Which are the 5 happiest countries?') - ``` + ```python + # All necessary imports at the beginning + import pandas as pd + from pandasai import Agent - Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````. - placeholder: | - A clear and concise description of what the bug is. + # Sample DataFrame + df = pd.DataFrame({ + "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], + "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], + "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + }) - ```python - Sample code to reproduce the problem - ``` + # Instantiate a LLM + from pandasai.llm import OpenAI + llm = OpenAI(api_token="YOUR_API_TOKEN") - ``` - The error message you got, with the full traceback. - ```` - validations: - required: true -- type: markdown - attributes: - value: > - Thanks for contributing 🎉! + df = Agent([df], config={"llm": llm}) + df.chat('Which are the 5 happiest countries?') + ``` + + Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````. + placeholder: | + A clear and concise description of what the bug is. + + ```python + Sample code to reproduce the problem + ``` + + ``` + The error message you got, with the full traceback. + ```` + validations: + required: true + - type: markdown + attributes: + value: > + Thanks for contributing 🎉! diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e7563efbf..68fcaa9db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,10 +29,11 @@ jobs: - name: Spellcheck run: make spell_check - name: Run tests - run: make tests + run: poetry run pytest tests --ignore=tests/integration_tests - name: Run code coverage run: | - make coverage + poetry run coverage run --source=pandasai -m pytest tests --ignore=tests/integration_tests + poetry run coverage xml - name: Report coverage run: poetry run coverage report - name: Upload coverage reports to Codecov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 563f79824..e9a048165 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: hooks: - id: pytest-check name: pytest-check - entry: make tests + entry: poetry run pytest tests --ignore=tests/integration_tests language: system pass_filenames: false always_run: true diff --git a/LICENSE b/LICENSE index d3735fd74..46dd3ae6b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,25 @@ -Copyright (c) Gabriele Venturi +Copyright (c) 2023 Sinaptik GmbH -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: +Portions of this software are licensed as follows: -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. +- All content that resides under any "pandasai/ee/" directory of this repository, if such directories exists, are licensed under the license defined in "pandasai/ee/LICENSE". +- All third party components incorporated into the PandasAI Software are licensed under the original license provided by the owner of the applicable component. +- Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSES/ASTOR_LICENSE.txt b/LICENSES/ASTOR_LICENSE.txt deleted file mode 100644 index 891112e41..000000000 --- a/LICENSES/ASTOR_LICENSE.txt +++ /dev/null @@ -1,29 +0,0 @@ -Copyright (c) 2012, Patrick Maupin -Copyright (c) 2013, Berker Peksag -Copyright (c) 2008, Armin Ronacher -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation and/or -other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors -may be used to endorse or promote products derived from this software without -specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/BEAUTIFUL_SOUP_LICENSE.txt b/LICENSES/BEAUTIFUL_SOUP_LICENSE.txt deleted file mode 100644 index fd6e64db9..000000000 --- a/LICENSES/BEAUTIFUL_SOUP_LICENSE.txt +++ /dev/null @@ -1,26 +0,0 @@ -Beautiful Soup is made available under the MIT license: - - Copyright (c) 2004-2012 Leonard Richardson - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE, DAMMIT. - -Beautiful Soup incorporates code from the html5lib library, which is -also made available under the MIT license. \ No newline at end of file diff --git a/LICENSES/BLACK_LICENSE.txt b/LICENSES/BLACK_LICENSE.txt deleted file mode 100644 index e0cb0b16d..000000000 --- a/LICENSES/BLACK_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2018 Łukasz Langa - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/CLICK_LICENSE.txt b/LICENSES/CLICK_LICENSE.txt deleted file mode 100644 index e63d78d30..000000000 --- a/LICENSES/CLICK_LICENSE.txt +++ /dev/null @@ -1,28 +0,0 @@ -Copyright 2014 Pallets - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/COVERAGE_LICENSE.txt b/LICENSES/COVERAGE_LICENSE.txt deleted file mode 100644 index 970a49dd8..000000000 --- a/LICENSES/COVERAGE_LICENSE.txt +++ /dev/null @@ -1,14 +0,0 @@ -Copyright 2001 Gareth Rees. All rights reserved. -Copyright 2004-2023 Ned Batchelder. All rights reserved. - -Except where noted otherwise, this software is licensed under the Apache -License, Version 2.0 (the "License"); you may not use this work except in -compliance with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. \ No newline at end of file diff --git a/LICENSES/DUCKDB_LICENSE.txt b/LICENSES/DUCKDB_LICENSE.txt deleted file mode 100644 index 831d49cc8..000000000 --- a/LICENSES/DUCKDB_LICENSE.txt +++ /dev/null @@ -1,7 +0,0 @@ -Copyright 2018-2023 Stichting DuckDB Foundation - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/GGPLOT_LICENSE.txt b/LICENSES/GGPLOT_LICENSE.txt deleted file mode 100644 index 7bf1bb274..000000000 --- a/LICENSES/GGPLOT_LICENSE.txt +++ /dev/null @@ -1,23 +0,0 @@ -Copyright (c) 2013, yhat -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/IPYTHON_LICENSE.txt b/LICENSES/IPYTHON_LICENSE.txt deleted file mode 100644 index 72cb32631..000000000 --- a/LICENSES/IPYTHON_LICENSE.txt +++ /dev/null @@ -1,33 +0,0 @@ -BSD 3-Clause License - -- Copyright (c) 2008-Present, IPython Development Team -- Copyright (c) 2001-2007, Fernando Perez -- Copyright (c) 2001, Janko Hauser -- Copyright (c) 2001, Nathaniel Gray - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/KALEIDO_LICENSE.txt b/LICENSES/KALEIDO_LICENSE.txt deleted file mode 100644 index ec6cda68e..000000000 --- a/LICENSES/KALEIDO_LICENSE.txt +++ /dev/null @@ -1,266 +0,0 @@ -All code developed in this repository is released under the MIT license (1). - -The Kaleido executable includes Chromium, which is released under the 3-clause BSD license (2). - -Chromium includes a wide range of third-party dependencies with varying licenses. -See the CREDITS.html file distributed with this package for license details -of these dependencies. - -The CREDITS.html is also available at https://github.com/plotly/Kaleido/blob/master/repos/CREDITS.html - -The Kaleido python PyPI package vendors MathJax, which is released under the terms of the Apache License (3) - -(1) ---- -The MIT License (MIT) - -Copyright (c) 2020 Plotly, Inc - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. - -(2) ---- -// Copyright 2015 The Chromium Authors. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -(3) ---- - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/LICENSES/MARKDOWN_INCLUDE_LICENSE.txt b/LICENSES/MARKDOWN_INCLUDE_LICENSE.txt deleted file mode 100644 index 70566f2d0..000000000 --- a/LICENSES/MARKDOWN_INCLUDE_LICENSE.txt +++ /dev/null @@ -1,674 +0,0 @@ -GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {one line to give the program's name and a brief idea of what it does.} - Copyright (C) {year} {name of author} - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - {project} Copyright (C) {year} {fullname} - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. \ No newline at end of file diff --git a/LICENSES/MATPLOTLIB_LICENSE.txt b/LICENSES/MATPLOTLIB_LICENSE.txt deleted file mode 100644 index ec51537db..000000000 --- a/LICENSES/MATPLOTLIB_LICENSE.txt +++ /dev/null @@ -1,99 +0,0 @@ -License agreement for matplotlib versions 1.3.0 and later -========================================================= - -1. This LICENSE AGREEMENT is between the Matplotlib Development Team -("MDT"), and the Individual or Organization ("Licensee") accessing and -otherwise using matplotlib software in source or binary form and its -associated documentation. - -2. Subject to the terms and conditions of this License Agreement, MDT -hereby grants Licensee a nonexclusive, royalty-free, world-wide license -to reproduce, analyze, test, perform and/or display publicly, prepare -derivative works, distribute, and otherwise use matplotlib -alone or in any derivative version, provided, however, that MDT's -License Agreement and MDT's notice of copyright, i.e., "Copyright (c) -2012- Matplotlib Development Team; All Rights Reserved" are retained in -matplotlib alone or in any derivative version prepared by -Licensee. - -3. In the event Licensee prepares a derivative work that is based on or -incorporates matplotlib or any part thereof, and wants to -make the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to matplotlib . - -4. MDT is making matplotlib available to Licensee on an "AS -IS" basis. MDT MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, MDT MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB -WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. - -5. MDT SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB - FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR -LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING -MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF -THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between MDT and -Licensee. This License Agreement does not grant permission to use MDT -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using matplotlib , -Licensee agrees to be bound by the terms and conditions of this License -Agreement. - -License agreement for matplotlib versions prior to 1.3.0 -======================================================== - -1. This LICENSE AGREEMENT is between John D. Hunter ("JDH"), and the -Individual or Organization ("Licensee") accessing and otherwise using -matplotlib software in source or binary form and its associated -documentation. - -2. Subject to the terms and conditions of this License Agreement, JDH -hereby grants Licensee a nonexclusive, royalty-free, world-wide license -to reproduce, analyze, test, perform and/or display publicly, prepare -derivative works, distribute, and otherwise use matplotlib -alone or in any derivative version, provided, however, that JDH's -License Agreement and JDH's notice of copyright, i.e., "Copyright (c) -2002-2011 John D. Hunter; All Rights Reserved" are retained in -matplotlib alone or in any derivative version prepared by -Licensee. - -3. In the event Licensee prepares a derivative work that is based on or -incorporates matplotlib or any part thereof, and wants to -make the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to matplotlib. - -4. JDH is making matplotlib available to Licensee on an "AS -IS" basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB -WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. - -5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB - FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR -LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING -MATPLOTLIB , OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF -THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between JDH and -Licensee. This License Agreement does not grant permission to use JDH -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using matplotlib, -Licensee agrees to be bound by the terms and conditions of this License -Agreement. \ No newline at end of file diff --git a/LICENSES/MKDOCSTRINGS_PYTHON_LICENSE.txt b/LICENSES/MKDOCSTRINGS_PYTHON_LICENSE.txt deleted file mode 100644 index e8538f901..000000000 --- a/LICENSES/MKDOCSTRINGS_PYTHON_LICENSE.txt +++ /dev/null @@ -1,15 +0,0 @@ -ISC License - -Copyright (c) 2021, Timothée Mazzucotelli - -Permission to use, copy, modify, and/or distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. - -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file diff --git a/LICENSES/MKDOCS_LICENSE.txt b/LICENSES/MKDOCS_LICENSE.txt deleted file mode 100644 index fdbc87c42..000000000 --- a/LICENSES/MKDOCS_LICENSE.txt +++ /dev/null @@ -1,26 +0,0 @@ -Copyright © 2014-present, Tom Christie. All rights reserved. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - -Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in -the documentation and/or other materials provided with the -distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN -ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/NUMPY_LICENSE.txt b/LICENSES/NUMPY_LICENSE.txt deleted file mode 100644 index f2d647bf0..000000000 --- a/LICENSES/NUMPY_LICENSE.txt +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2005-2023, NumPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of the NumPy Developers nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/OPENPYXL_LICENSE.txt b/LICENSES/OPENPYXL_LICENSE.txt deleted file mode 100644 index 96dae89ed..000000000 --- a/LICENSES/OPENPYXL_LICENSE.txt +++ /dev/null @@ -1,18 +0,0 @@ -This software is under the MIT Licence -Copyright (c) 2010 openpyxl -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PANDAS_LICENSE.txt b/LICENSES/PANDAS_LICENSE.txt deleted file mode 100644 index b11ba835b..000000000 --- a/LICENSES/PANDAS_LICENSE.txt +++ /dev/null @@ -1,31 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team -All rights reserved. - -Copyright (c) 2011-2023, Open source contributors. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/PLOTLY_LICENSE.txt b/LICENSES/PLOTLY_LICENSE.txt deleted file mode 100644 index 7c4c5ebbe..000000000 --- a/LICENSES/PLOTLY_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2016-2018 Plotly, Inc - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/POLARS_LICENSE.txt b/LICENSES/POLARS_LICENSE.txt deleted file mode 100644 index 9dbfcb834..000000000 --- a/LICENSES/POLARS_LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2020 Ritchie Vink - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PRE_COMMIT_LICENSE.txt b/LICENSES/PRE_COMMIT_LICENSE.txt deleted file mode 100644 index 69fb05fea..000000000 --- a/LICENSES/PRE_COMMIT_LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2014 pre-commit dev team: Anthony Sottile, Ken Struys - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PSYCOPG2_LICENSE.txt b/LICENSES/PSYCOPG2_LICENSE.txt deleted file mode 100644 index 50d31d771..000000000 --- a/LICENSES/PSYCOPG2_LICENSE.txt +++ /dev/null @@ -1,49 +0,0 @@ -psycopg2 and the LGPL ---------------------- - -psycopg2 is free software: you can redistribute it and/or modify it -under the terms of the GNU Lesser General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -psycopg2 is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -In addition, as a special exception, the copyright holders give -permission to link this program with the OpenSSL library (or with -modified versions of OpenSSL that use the same license as OpenSSL), -and distribute linked combinations including the two. - -You must obey the GNU Lesser General Public License in all respects for -all of the code used other than OpenSSL. If you modify file(s) with this -exception, you may extend this exception to your version of the file(s), -but you are not obligated to do so. If you do not wish to do so, delete -this exception statement from your version. If you delete this exception -statement from all source files in the program, then also delete it here. - -You should have received a copy of the GNU Lesser General Public License -along with psycopg2 (see the doc/ directory.) -If not, see . - - -Alternative licenses --------------------- - -The following BSD-like license applies (at your option) to the files following -the pattern ``psycopg/adapter*.{h,c}`` and ``psycopg/microprotocol*.{h,c}``: - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product documentation - would be appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must not - be misrepresented as being the original software. - - 3. This notice may not be removed or altered from any source distribution. \ No newline at end of file diff --git a/LICENSES/PYDANTIC_LICENSE.txt b/LICENSES/PYDANTIC_LICENSE.txt deleted file mode 100644 index f793af099..000000000 --- a/LICENSES/PYDANTIC_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2017 to present Pydantic Services Inc. and individual contributors. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PYMYSQL_LICENSE.txt b/LICENSES/PYMYSQL_LICENSE.txt deleted file mode 100644 index 4d76c99d5..000000000 --- a/LICENSES/PYMYSQL_LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright (c) 2010, 2013 PyMySQL contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PYTEST_ENV_LICENSE.txt b/LICENSES/PYTEST_ENV_LICENSE.txt deleted file mode 100644 index 1b4e5fa9f..000000000 --- a/LICENSES/PYTEST_ENV_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2010-202x The pytest-env developers - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PYTEST_LICENSE.txt b/LICENSES/PYTEST_LICENSE.txt deleted file mode 100644 index 8c9d2884a..000000000 --- a/LICENSES/PYTEST_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2004 Holger Krekel and others - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PYTEST_MOCK_LICENSE.txt b/LICENSES/PYTEST_MOCK_LICENSE.txt deleted file mode 100644 index e0550cdcc..000000000 --- a/LICENSES/PYTEST_MOCK_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) [2016] [Bruno Oliveira] - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/PYTHON_DOTENV_LICENSE.txt b/LICENSES/PYTHON_DOTENV_LICENSE.txt deleted file mode 100644 index 24c53e688..000000000 --- a/LICENSES/PYTHON_DOTENV_LICENSE.txt +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2014, Saurabh Kumar (python-dotenv), 2013, Ted Tieken (django-dotenv-rw), 2013, Jacob Kaplan-Moss (django-dotenv) - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -- Neither the name of django-dotenv nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/RUFF_LICENSE.txt b/LICENSES/RUFF_LICENSE.txt deleted file mode 100644 index 0b0bd33bb..000000000 --- a/LICENSES/RUFF_LICENSE.txt +++ /dev/null @@ -1,1348 +0,0 @@ -MIT License - -Copyright (c) 2022 Charles Marsh - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -end of terms and conditions - -The externally maintained libraries from which parts of the Software is derived -are: - -- flake8-comprehensions, licensed as follows: - """ - MIT License - - Copyright (c) 2017 Adam Johnson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-no-pep420, licensed as follows: - """ - MIT License - - Copyright (c) 2020 Adam Johnson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-tidy-imports, licensed as follows: - """ - MIT License - - Copyright (c) 2017 Adam Johnson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-return, licensed as follows: - """ - MIT License - - Copyright (c) 2019 Afonasev Evgeniy - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-2020, licensed as follows: - """ - Copyright (c) 2019 Anthony Sottile - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- pyupgrade, licensed as follows: - """ - Copyright (c) 2017 Anthony Sottile - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- flake8-blind-except, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2014 Elijah Andrews - - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of - the Software, and to permit persons to whom the Software is furnished to do so, - subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR - COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER - IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - """ - -- flake8-gettext, licensed as follows: - """ - BSD Zero Clause License - - Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. - - THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - """ - -- flake8-implicit-str-concat, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2019 Dylan Turner - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- flake8-debugger, licensed as follows: - """ - MIT License - - Copyright (c) 2016 Joseph Kahn - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-pyi, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2016 Łukasz Langa - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-print, licensed as follows: - """ - MIT License - - Copyright (c) 2016 Joseph Kahn - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-import-conventions, licensed as follows: - """ - MIT License - - Copyright (c) 2021 João Palmeiro - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-simplify, licensed as follows: - """ - MIT License - - Copyright (c) 2020 Martin Thoma - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-slots, licensed as follows: - """ - Copyright (c) 2021 Dominic Davis-Foster - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, - DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE - OR OTHER DEALINGS IN THE SOFTWARE. - """ - -- flake8-todos, licensed as follows: - """ - Copyright (c) 2019 EclecticIQ. All rights reserved. - Copyright (c) 2020 Gram . All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from this - software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ - -- flake8-unused-arguments, licensed as follows: - """ - MIT License - - Copyright (c) 2019 Nathan Hoad - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- pygrep-hooks, licensed as follows: - """ - Copyright (c) 2018 Anthony Sottile - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- autoflake, licensed as follows: - """ - Copyright (C) 2012-2018 Steven Myint - - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is furnished to do - so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- autotyping, licensed as follows: - """ - MIT License - - Copyright (c) 2023 Jelle Zijlstra - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- Flake8, licensed as follows: - """ - == Flake8 License (MIT) == - - Copyright (C) 2011-2013 Tarek Ziade - Copyright (C) 2012-2016 Ian Cordasco - - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is furnished to do - so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-bugbear, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2016 Łukasz Langa - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-commas, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2017 Thomas Grainger. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - - - Portions of this flake8-commas Software may utilize the following - copyrighted material, the use of which is hereby acknowledged. - - Original flake8-commas: https://github.com/trevorcreech/flake8-commas/commit/e8563b71b1d5442e102c8734c11cb5202284293d - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- flynt, licensed as follows: - """ - MIT License - - Copyright (c) 2019-2022 Ilya Kamenshchikov - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- isort, licensed as follows: - """ - The MIT License (MIT) - - Copyright (c) 2013 Timothy Edmund Crosley - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- pep8-naming, licensed as follows: - """ - Copyright © 2013 Florent Xicluna - - Licensed under the terms of the Expat License - - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation files - (the "Software"), to deal in the Software without restriction, - including without limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of the Software, - and to permit persons to whom the Software is furnished to do so, - subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- pycodestyle, licensed as follows: - """ - Copyright © 2006-2009 Johann C. Rocholl - Copyright © 2009-2014 Florent Xicluna - Copyright © 2014-2020 Ian Lee - - Licensed under the terms of the Expat License - - Permission is hereby granted, free of charge, to any person - obtaining a copy of this software and associated documentation files - (the "Software"), to deal in the Software without restriction, - including without limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of the Software, - and to permit persons to whom the Software is furnished to do so, - subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- pydocstyle, licensed as follows: - """ - Copyright (c) 2012 GreenSteam, - - Copyright (c) 2014-2020 Amir Rachum, - - Copyright (c) 2020 Sambhav Kothari, - - Permission is hereby granted, free of charge, to any person obtaining a copy of - this software and associated documentation files (the "Software"), to deal in - the Software without restriction, including without limitation the rights to - use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - of the Software, and to permit persons to whom the Software is furnished to do - so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- Pyflakes, licensed as follows: - """ - Copyright 2005-2011 Divmod, Inc. - Copyright 2013-2014 Florent Xicluna - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - """ - -- flake8-use-pathlib, licensed as follows: - """ - MIT License - - Copyright (c) 2021 Rodolphe Pelloux-Prayer - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- RustPython, licensed as follows: - """ - MIT License - - Copyright (c) 2020 RustPython Team - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-annotations, licensed as follows: - """ - MIT License - - Copyright (c) 2019 - Present S. Co1 - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-async, licensed as follows: - """ - MIT License - - Copyright (c) 2022 Cooper Lees - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-type-checking, licensed as follows: - """ - Copyright (c) 2021, Sondre Lillebø Gundersen - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name of pytest-{{ cookiecutter.plugin_name }} nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - """ - -- flake8-bandit, licensed as follows: - """ - Copyright (c) 2017 Tyler Wince - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- flake8-eradicate, licensed as follows: - """ - MIT License - - Copyright (c) 2018 Nikita Sobolev - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-quotes, licensed as follows: - """ - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - """ - -- flake8-logging-format, licensed as follows: - """ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - """ - -- flake8-raise, licensed as follows: - """ - MIT License - - Copyright (c) 2020 Jon Dufresne - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-self, licensed as follows: - """ - MIT License - - Copyright (c) 2023 Korijn van Golen - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-django, licensed under the GPL license. - -- perflint, licensed as follows: - """ - MIT License - - Copyright (c) 2022 Anthony Shaw - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- flake8-logging, licensed as follows: - """ - MIT License - - Copyright (c) 2023 Adam Johnson - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ - -- Pyright, licensed as follows: - """ - MIT License - - Pyright - A static type checker for the Python language - Copyright (c) Microsoft Corporation. All rights reserved. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE - """ - -- rust-analyzer/text-size, licensed under the MIT license: - """ - Permission is hereby granted, free of charge, to any - person obtaining a copy of this software and associated - documentation files (the "Software"), to deal in the - Software without restriction, including without - limitation the rights to use, copy, modify, merge, - publish, distribute, sublicense, and/or sell copies of - the Software, and to permit persons to whom the Software - is furnished to do so, subject to the following - conditions: - - The above copyright notice and this permission notice - shall be included in all copies or substantial portions - of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF - ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED - TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A - PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT - SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR - IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - """ - -- rome/tools, licensed under the MIT license: - """ - MIT License - - Copyright (c) Rome Tools, Inc. and its affiliates. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - """ \ No newline at end of file diff --git a/LICENSES/SCIKIT_LEARN_LICENSE.txt b/LICENSES/SCIKIT_LEARN_LICENSE.txt deleted file mode 100644 index cab858455..000000000 --- a/LICENSES/SCIKIT_LEARN_LICENSE.txt +++ /dev/null @@ -1,29 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2007-2023 The scikit-learn developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/SCIPY_LICENSE.txt b/LICENSES/SCIPY_LICENSE.txt deleted file mode 100644 index 640dc9ab5..000000000 --- a/LICENSES/SCIPY_LICENSE.txt +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/SEABORN_LICENSE.txt b/LICENSES/SEABORN_LICENSE.txt deleted file mode 100644 index 8582e9b92..000000000 --- a/LICENSES/SEABORN_LICENSE.txt +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2012-2021, Michael L. Waskom -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the project nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/SNOWFLAKE_SQLALCHEMY_LICENSE.txt b/LICENSES/SNOWFLAKE_SQLALCHEMY_LICENSE.txt deleted file mode 100644 index c1a74fd11..000000000 --- a/LICENSES/SNOWFLAKE_SQLALCHEMY_LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright (c) 2012-2023 Snowflake Computing, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/LICENSES/SOURCERY_LICENSE.txt b/LICENSES/SOURCERY_LICENSE.txt deleted file mode 100644 index 352b398e4..000000000 --- a/LICENSES/SOURCERY_LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2020 SOURCERY.AI LIMITED - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/SQLALCHEMY_LICENSE.txt b/LICENSES/SQLALCHEMY_LICENSE.txt deleted file mode 100644 index 69ef8f6e7..000000000 --- a/LICENSES/SQLALCHEMY_LICENSE.txt +++ /dev/null @@ -1,19 +0,0 @@ -Copyright 2005-2023 SQLAlchemy authors and contributors . - -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file diff --git a/LICENSES/STATSMODELS_LICENSE.txt b/LICENSES/STATSMODELS_LICENSE.txt deleted file mode 100644 index 1d8094fcc..000000000 --- a/LICENSES/STATSMODELS_LICENSE.txt +++ /dev/null @@ -1,34 +0,0 @@ -Copyright (C) 2006, Jonathan E. Taylor -All rights reserved. - -Copyright (c) 2006-2008 Scipy Developers. -All rights reserved. - -Copyright (c) 2009-2018 statsmodels Developers. -All rights reserved. - - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - a. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - b. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - c. Neither the name of statsmodels nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL STATSMODELS OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -DAMAGE. \ No newline at end of file diff --git a/LICENSES/STREAMLIT_LICENSE.txt b/LICENSES/STREAMLIT_LICENSE.txt deleted file mode 100644 index f49a4e16e..000000000 --- a/LICENSES/STREAMLIT_LICENSE.txt +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/LICENSES/YFINANCE_LICENSE.txt b/LICENSES/YFINANCE_LICENSE.txt deleted file mode 100644 index 7a4a3ea24..000000000 --- a/LICENSES/YFINANCE_LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/README.md b/README.md index 928c7c261..09051d96e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# PandasAI 🐼 +# ![PandasAI](images/logo.png) [![Release](https://img.shields.io/pypi/v/pandasai?label=Release&style=flat-square)](https://pypi.org/project/pandasai/) [![CI](https://github.com/gventuri/pandas-ai/actions/workflows/ci.yml/badge.svg)](https://github.com/gventuri/pandas-ai/actions/workflows/ci.yml/badge.svg) @@ -9,83 +9,81 @@ [![Downloads](https://static.pepy.tech/badge/pandasai)](https://pepy.tech/project/pandasai) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) -PandasAI is a Python library that adds Generative AI capabilities to [pandas](https://github.com/pandas-dev/pandas), the popular data analysis and manipulation tool. It is designed to be used in conjunction with pandas, and is not a replacement for it. +PandasAI is a Python library that makes it easy to ask questions to your data in natural language. It helps you to explore, clean, and analyze your data using generative AI. - +# 🔧 Getting started -![PandasAI](images/pandas-ai.png?raw=true) +The documentation for PandasAI to use it with specific LLMs, vector stores and connectors, can be found [here](https://pandas-ai.readthedocs.io/en/latest/). -## 🔧 Quick install +## 📦 Installation + +With pip: ```bash pip install pandasai ``` -## 🔍 Demo +With poetry: -Try out PandasAI in your browser: +```bash +poetry add pandasai +``` -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) +## 🔍 Demo -## 📖 Documentation +Try out PandasAI yourself in your browser: -The documentation for PandasAI can be found [here](https://pandas-ai.readthedocs.io/en/latest/). +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) -## 💻 Usage +# 🚀 Deploying PandasAI -> Disclaimer: GDP data was collected from [this source](https://ourworldindata.org/grapher/gross-domestic-product?tab=table), published by World Development Indicators - World Bank (2022.05.26) and collected at National accounts data - World Bank / OECD. It relates to the year of 2020. Happiness indexes were extracted from [the World Happiness Report](https://ftnnews.com/images/stories/documents/2020/WHR20.pdf). Another useful [link](https://data.world/makeovermonday/2020w19-world-happiness-report-2020). +PandasAI can be deployed in a variety of ways. You can easily use it in your Jupyter notebooks or streamlit apps, or you can deploy it as a REST API such as with FastAPI or Flask. -PandasAI is designed to be used in conjunction with pandas. It makes pandas conversational, allowing you to ask questions to your data in natural language. +If you are interested in managed PandasAI Cloud or self-hosted Enterprise Offering, take a look at [our website](https://pandas-ai.com) or [book a meeting with us](https://zcal.co/gventuri/pandas-ai-demo). -### Queries +## 💻 Usage -For example, you can ask PandasAI to find all the rows in a DataFrame where the value of a column is greater than 5, and it will return a DataFrame containing only those rows: +### Ask questions ```python import pandas as pd from pandasai import SmartDataframe # Sample DataFrame -df = pd.DataFrame({ +sales_by_country = pd.DataFrame({ "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) # Instantiate a LLM from pandasai.llm import OpenAI llm = OpenAI(api_token="YOUR_API_TOKEN") -df = SmartDataframe(df, config={"llm": llm}) -df.chat('Which are the 5 happiest countries?') +df = SmartDataframe(sales_by_country, config={"llm": llm}) +df.chat('Which are the top 5 countries by sales?') ``` -The above code will return the following: - ``` -6 Canada -7 Australia -1 United Kingdom -3 Germany -0 United States -Name: country, dtype: object +China, United States, Japan, Germany, Australia ``` -Of course, you can also ask PandasAI to perform more complex queries. For example, you can ask PandasAI to find the sum of the GDPs of the 2 unhappiest countries: +--- + +Or you can ask more complex questions: ```python -df.chat('What is the sum of the GDPs of the 2 unhappiest countries?') +df.chat( + "What is the total sales for the top 3 countries by sales?" +) ``` -The above code will return the following: - ``` -19012600725504 +The total sales for the top 3 countries by sales is 16500. ``` -### Charts +### Visualize charts -You can also ask PandasAI to draw a graph: +You can also ask PandasAI to generate charts for you: ```python df.chat( @@ -95,11 +93,9 @@ df.chat( ![Chart](images/histogram-chart.png?raw=true) -You can save any charts generated by PandasAI by setting the `save_charts` parameter to `True` in the `PandasAI` constructor. For example, `PandasAI(llm, save_charts=True)`. Charts are saved in `./pandasai/exports/charts` . - ### Multiple DataFrames -Additionally, you can also pass in multiple dataframes to PandasAI and ask questions relating them. +You can also pass in multiple dataframes to PandasAI and ask questions relating them. ```python import pandas as pd @@ -126,60 +122,35 @@ dl = SmartDatalake([employees_df, salaries_df], config={"llm": llm}) dl.chat("Who gets paid the most?") ``` -The above code will return the following: - ``` -Oh, Olivia gets paid the most. +Olivia gets paid the most. ``` You can find more examples in the [examples](examples) directory. -### ⚡️ Shortcuts - -PandasAI also provides a number of shortcuts (beta) to make it easier to ask questions to your data. For example, you can ask PandasAI to `clean_data`, `impute_missing_values`, `generate_features`, `plot_histogram`, and many many more. - -```python -# Clean data -df.clean_data() +## 🔒 Privacy & Security -# Impute missing values -df.impute_missing_values() +In order to generate the Python code to run, we take some random samples from the dataframe, we randomize it (using random generation for sensitive data and shuffling for non-sensitive data) and send just the randomized head to the LLM. -# Generate features -df.generate_features() +If you want to enforce further your privacy you can instantiate PandasAI with `enforce_privacy = True` which will not send the head (but just column names) to the LLM. -# Plot histogram -df.plot_histogram(column="gdp") -``` +## 📜 License -Learn more about the shortcuts [here](https://pandas-ai.readthedocs.io/en/latest/shortcuts/). +PandasAI is available under the MIT expat license, except for the `pandasai/ee` directory (which has it's [license here](https://github.com/Sinaptik-AI/pandas-ai/blob/master/pandasai/ee/LICENSE) if applicable. -## 🔒 Privacy & Security +If you are interested in managed PandasAI Cloud or self-hosted Enterprise Offering, take a look at [our website](https://pandas-ai.com) or [book a meeting with us](https://zcal.co/gventuri/pandas-ai-demo). -In order to generate the Python code to run, we take the dataframe head, we randomize it (using random generation for sensitive data and shuffling for non-sensitive data) and send just the head. +## Resources -Also, if you want to enforce further your privacy you can instantiate PandasAI with `enforce_privacy = True` which will not send the head (but just column names) to the LLM. +- [Docs](https://pandas-ai.readthedocs.io/en/latest/) for comprehensive documentation +- [Examples](examples) for example notebooks +- [Discord](https://discord.gg/kF7FqH2FwS) for discussion with the community and PandasAI team ## 🤝 Contributing -Contributions are welcome! Please check out the todos below, and feel free to open a pull request. -For more information, please see the [contributing guidelines](CONTRIBUTING.md). - -After installing the virtual environment, please remember to install `pre-commit` to be compliant with our standards: +Contributions are welcome! Please check the outstanding issues and feel free to open a pull request. +For more information, please check out the [contributing guidelines](CONTRIBUTING.md). -```bash -pre-commit install -``` - -## Contributors +### Thank you! [![Contributors](https://contrib.rocks/image?repo=gventuri/pandas-ai)](https://github.com/gventuri/pandas-ai/graphs/contributors) - -## 📜 License - -PandasAI is licensed under the MIT License. See the LICENSE file for more details. - -## Acknowledgements - -- This project is based on the [pandas](https://github.com/pandas-dev/pandas) library by independent contributors, but it's in no way affiliated with the pandas project. -- This project is meant to be used as a tool for data exploration and analysis, and it's not meant to be used for production purposes. Please use it responsibly. diff --git a/docs/API/helpers.md b/docs/API/helpers.md deleted file mode 100644 index 28f5c8427..000000000 --- a/docs/API/helpers.md +++ /dev/null @@ -1,10 +0,0 @@ -## Helpers -This module includes methods classified as helpers. - -### Anonymizer - -A collection of methods to help handle sensitive information - -::: pandasai.helpers.anonymizer - options: - show_root_heading: true diff --git a/docs/API/llms.md b/docs/API/llms.md deleted file mode 100644 index eebd86f67..000000000 --- a/docs/API/llms.md +++ /dev/null @@ -1,42 +0,0 @@ -## LLMs - -This document outlines the LLMs API wrappers included in the `pandasai`. - -### Base - -This is a base class to implement any LLM to be used with `pandasai` framework. - -::: pandasai.llm.base -options: -show_root_heading: true - -### OpenAI - -OpenAI API wrapper extended through BaseOpenAI class. - -::: pandasai.llm.openai -options: -show_root_heading: true - -### Azure OpenAI - -OpenAI API through Azure Platform wrapper - -::: pandasai.llm.azure_openai -options: -show_root_heading: true - -### GooglePalm - -GooglePalm class extended through BaseGoogle Class - -::: pandasai.llm.google_palm -options: -show_root_heading: true - -### Fake - -A test fake class -::: pandasai.llm.fake -options: -show_root_heading: true diff --git a/docs/API/pandasai.md b/docs/API/pandasai.md deleted file mode 100644 index c082ab34d..000000000 --- a/docs/API/pandasai.md +++ /dev/null @@ -1,25 +0,0 @@ -## PANDASAI - -This Section of API covers the BaseModule Implementation along with some Package Constants and Exceptions. - - -### Main - -The `init` of pandasai module contains the a high level wrapper to run the package. -::: pandasai - options: - show_root_heading: true - -### Constants -Some of the package level constants are defined here. -::: pandasai.constants - options: - show_root_heading: true - -### Exception Handling - -The pandasai specific Exception handling mechanism defined here. - -::: pandasai.exceptions - options: - show_root_heading: true diff --git a/docs/API/prompts.md b/docs/API/prompts.md deleted file mode 100644 index 5639e422c..000000000 --- a/docs/API/prompts.md +++ /dev/null @@ -1,27 +0,0 @@ -## Prompts - -This module includes some methods on optimally handling prompts when interacting with LLMs. - -### Base Prompt - -A base prompt - -::: pandasai.prompts.base -options: -show_root_heading: true - -### Generate Python Code - -A standard prompt is designed to be used when querying the LLMs to generate Python Code. - -::: pandasai.prompts.generate_python_code -options: -show_root_heading: true - -### Generate Python Code On Error - -A prompt to generate Python Code on Error - -::: pandasai.prompts.correct_error_prompt -options: -show_root_heading: true diff --git a/docs/LLMs/llms.md b/docs/LLMs/llms.md index 236fc9f23..f43e234cd 100644 --- a/docs/LLMs/llms.md +++ b/docs/LLMs/llms.md @@ -10,13 +10,41 @@ If the model expects one or more parameters, you can pass them to the constructo ```json { - "llm": "OpenAI", + "llm": "BambooLLM", "llm_options": { - "api_token": "API_TOKEN_GOES_HERE" + "api_key": "API_KEY_GOES_HERE" } } ``` +## BambooLLM + +BambooLLM is the state-of-the-art language model developed by [PandasAI](https://pandas-ai.com) with data analysis in mind. It is designed to understand and execute natural language queries related to data analysis, data manipulation, and data visualization. It's currently in closed beta and available only to a select group of users, but it will be available to the public soon. You can join the waitlist [here](https://docs.google.com/forms/d/1RvdGO6dmV9NY2EaNoxmitxQmRYh3oThVRznoXqJBSFI). + +```python +from pandasai import SmartDataframe +from pandasai.llm import BambooLLM + +llm = BambooLLM(api_key="my-bamboo-api-key") +df = SmartDataframe("data.csv", config={"llm": llm}) + +response = df.chat("Calculate the sum of the gdp of north american countries") +print(response) +``` + +As an alternative, you can set the `PANDASAI_API_KEY` environment variable and instantiate the `BambooLLM` object without passing the API key: + +```python +from pandasai import SmartDataframe +from pandasai.llm import BambooLLM + +llm = BambooLLM() # no need to pass the API key, it will be read from the environment variable +df = SmartDataframe("data.csv", config={"llm": llm}) + +response = df.chat("Calculate the sum of the gdp of north american countries") +print(response) +``` + ## OpenAI models In order to use OpenAI models, you need to have an OpenAI API key. You can get one [here](https://platform.openai.com/account/api-keys). diff --git a/docs/building_docs.md b/docs/building_docs.md deleted file mode 100644 index da5fc5fcf..000000000 --- a/docs/building_docs.md +++ /dev/null @@ -1,60 +0,0 @@ -Example: Building Docs with MkDocs -=================================== - -This example shows a basic MkDocs project with Read the Docs. This project is using `mkdocs` with `readthedocs` -project template. - -Some useful links are given below to lear and contribute in the project. - -📚 [docs/](https://github.com/readthedocs-examples/example-mkdocs-basic/blob/main/docs/)
-A basic MkDocs project lives in `docs/`, it was generated using MkDocs defaults. All the `*.md` make up sections in the documentation. - -⚙️ [.readthedocs.yaml](https://github.com/readthedocs-examples/example-mkdocs-basic/blob/main/.readthedocs.yaml)
-Read the Docs Build configuration is stored in `.readthedocs.yaml`. - -⚙️ [mkdocs.yml](https://github.com/readthedocs-examples/example-mkdocs-basic/blob/main/mkdocs.yml)
-A basic [MkDocs configuration](https://www.mkdocs.org/user-guide/configuration/) is stored here, including a few extensions for MkDocs and Markdown. Add your own configurations here, such as extensions and themes. Remember that many extensions and themes require additional Python packages to be installed. - -📍 [docs/requirements.txt](https://github.com/readthedocs-examples/example-mkdocs-basic/blob/main/docs/requirements.txt) and [docs/requirements.in](https://github.com/readthedocs-examples/example-mkdocs-basic/blob/main/docs/requirements.in)
-Python dependencies are [pinned](https://docs.readthedocs.io/en/latest/guides/reproducible-builds.html) (uses [pip-tools](https://pip-tools.readthedocs.io/en/latest/)) here. Make sure to add your Python dependencies to `requirements.txt` or if you choose [pip-tools](https://pip-tools.readthedocs.io/en/latest/), edit `docs/requirements.in` and remember to run to run `pip-compile docs/requirements.in`. - -Example Project usage ---------------------- - -`Poetry` is the package manager for `pandasai`. In order to build documentation, we have to add requirements in -development environment. - -This project has a standard MkDocs layout which is built by Read the Docs almost the same way that you would build it -locally (on your own laptop!). - -You can build and view this documentation project locally - we recommend that you activate a `Poetry` environment -and dependency management tool. -```console -# Install required Python dependencies (MkDocs etc.) -poetry install --with docs -# Run the mkdocs development server -mkdocs serve -``` - -Project Docs Structure ----------------------- -If you are new to Read the Docs, you may want to refer to the [Read the Docs User documentation](https://docs.readthedocs.io/). - -Below is the rundown of documentation structure for `pandasai`, you need to know: - -1. place your `docs/` folder alongside your Python project. -2. copy `mkdocs.yml`, `.readthedocs.yaml` and the `docs/` folder into your project root. -3. `docs/API` contains the API documentation created using `docstring`. For any new module, add the links here -4. Project is using standard Google Docstring Style. -5. Rebuild the documentation locally to see that it works. -6. Documentation are hosted on [Read the Docs tutorial](https://docs.readthedocs.io/en/stable/tutorial/) - -> Define the release version in `mkdocs.yml` file. - -Read the Docs tutorial ----------------------- - -To get started with Read the Docs, you may also refer to the -[Read the Docs tutorial](https://docs.readthedocs.io/en/stable/tutorial/). I - -With every release, build the documentation manually. diff --git a/docs/connectors.md b/docs/connectors.md index 9c335c429..e6f6c2719 100644 --- a/docs/connectors.md +++ b/docs/connectors.md @@ -27,7 +27,9 @@ PandasAI provides connectors for the following SQL databases: - Generic SQL - Snowflake - DataBricks +- GoogleBigQuery - Yahoo Finance +- Airtable Additionally, PandasAI provides a generic SQL connector that can be used to connect to any SQL database. @@ -143,13 +145,13 @@ sql_connector = SQLConnector( ## Snowflake connector -The Snowflake connector allows you to connect to Snowflake. It is very similar to the SQL connectors, but it has some differences. +The Snowflake connector allows you to connect to Snowflake. It is very similar to the SQL connectors, but it has some differences. The usage of this connector might be subject to a license ([check it out](https://github.com/Sinaptik-AI/pandas-ai/blob/master/pandasai/ee/LICENSE)). To use the Snowflake connector, you only need to import it into your Python code and pass it to a `SmartDataframe` or `SmartDatalake` object: ```python from pandasai import SmartDataframe -from pandasai.connectors import SnowFlakeConnector +from pandasai.ee.connectors import SnowFlakeConnector snowflake_connector = SnowFlakeConnector( config={ @@ -174,12 +176,12 @@ df.chat("How many records has status 'F'?") ## DataBricks connector -The DataBricks connector allows you to connect to DataBricks. It is very similar to the SQL connectors, but it has some differences. +The DataBricks connector allows you to connect to DataBricks. It is very similar to the SQL connectors, but it has some differences. The usage of this connector might be subject to a license ([check it out](https://github.com/Sinaptik-AI/pandas-ai/blob/master/pandasai/ee/LICENSE)). -To use the DataBricks connector, you only need to import it into your Python code and pass it to a `SmartDataframe` or `SmartDatalake` object: +To use the DataBricks connector, you only need to import it into your Python code and pass it to a `Agent`, `SmartDataframe` or `SmartDatalake` object: ```python -from pandasai.connectors import DatabricksConnector +from pandasai.ee.connectors import DatabricksConnector databricks_connector = DatabricksConnector( config={ @@ -198,6 +200,30 @@ databricks_connector = DatabricksConnector( ) ``` +## GoogleBigQuery connector + +The GoogleBigQuery connector allows you to connect to GoogleBigQuery datasests. It is very similar to the SQL connectors, but it has some differences. The usage of this connector might be subject to a license ([check it out](https://github.com/Sinaptik-AI/pandas-ai/blob/master/pandasai/ee/LICENSE)). + +To use the GoogleBigQuery connector, you only need to import it into your Python code and pass it to a `Agent`, `SmartDataframe` or `SmartDatalake` object: + +```python +from pandasai.connectors import GoogleBigQueryConnector + +bigquery_connector = GoogleBigQueryConnector( + config={ + "credentials_path" : "path to keyfile.json", + "database" : "dataset_name", + "table" : "table_name", + "projectID" : "Project_id_name", + "where": [ + # this is optional and filters the data to + # reduce the size of the dataframe + ["loan_status", "=", "PAIDOFF"], + ], + } +) +``` + ## Yahoo Finance connector The Yahoo Finance connector allows you to connect to Yahoo Finance, by simply passing the ticker symbol of the stock you want to analyze. @@ -218,7 +244,7 @@ df.chat("What is the closing price for yesterday?") The Airtable connector allows you to connect to Airtable Projects Tables, by simply passing the `base_id` , `token` and `table_name` of the table you want to analyze. -To use the Airtable connector, you only need to import it into your Python code and pass it to a `SmartDataframe` or `SmartDatalake` object: +To use the Airtable connector, you only need to import it into your Python code and pass it to a `Agent`,`SmartDataframe` or `SmartDatalake` object: ```python from pandasai.connectors import AirtableConnector diff --git a/docs/custom-instructions.md b/docs/custom-instructions.md deleted file mode 100644 index d5e3457a8..000000000 --- a/docs/custom-instructions.md +++ /dev/null @@ -1,15 +0,0 @@ -# Custom instructions - -In some cases, you may want to customize the instructions that are used by PandasAI. For example, you may want to use a different instruction for a specific use case to improve the results for certain types of queries. - -With PandasAI, you can easily customize the instructions that are used by the library. You can do this by passing a `custom_instructions` string in the config dictionary to the `SmartDataframe` constructor. - -## Example - -```python -from pandasai import SmartDataframe - -df = SmartDataframe("data.csv", config={ - "custom_instructions": "Custom instructions for the generation of Python code" -}) -``` diff --git a/docs/custom-prompts.md b/docs/custom-prompts.md deleted file mode 100644 index f385ff91c..000000000 --- a/docs/custom-prompts.md +++ /dev/null @@ -1,93 +0,0 @@ -# Custom prompts - -In some cases, you may want to customize the prompts that are used by PandasAI. For example, you may want to use a different prompt for a specific use case to improve the results for certain types of queries. - -With PandasAI, you can easily customize the prompts that are used by the library. You can do this by passing a `prompts` dictionary to the `PandasAI` constructor. The keys of the dictionary are the names of the prompts, and the values are the prompts themselves. - -There are 5 types of prompts that you can override at the moment: - -- `generate_python_code`: this is the prompt that is used to generate Python code from a natural language query. PandasAI uses this prompt as the standard prompt for the first query. - -- `correct_error`: this is the prompt that is used to correct the generated Python code. Whenever the code generated by PandasAI is not correct, an exception is raised and a new call to the LLM is made with this prompt to correct the error. - -## How to create custom prompts - -To create your custom prompt create a new CustomPromptClass inherited from base `Prompt` class. - -```python -from pandasai import SmartDataframe -from pandasai.prompts import AbstractPrompt - - -class MyCustomPrompt(AbstractPrompt): - @property - def template(self): - return """This is your custom text for your prompt with custom {my_custom_value}""" - - def setup(self, kwargs): - # This method is called before the prompt is initialized - # You can use it to setup your prompt and pass any additional - # variables to the template - self.set_var("my_custom_value", kwargs["my_custom_value"]) - - -df = SmartDataframe("data.csv", config={ - "custom_prompts": { - "generate_python_code": MyCustomPrompt( - my_custom_value="my custom value") - } -}) -``` - -You can also use `FileBasedPrompt` in case you prefer to store prompt template in a file: - -_my_prompt_template.tmpl:_ - -``` -This is your custom text for your prompt with custom {my_custom_value} -``` - -_python code:_ - -```python -from pandasai import SmartDataframe -from pandasai.prompts import FileBasedPrompt - - -class MyCustomFileBasedPrompt(FileBasedPrompt): - _path_to_template = "path/to/my_prompt_template.tmpl" - - -df = SmartDataframe("data.csv", config={ - "custom_prompts": { - "generate_python_code": MyCustomFileBasedPrompt( - my_custom_value="my custom value") - } -}) -``` - -## Using dynamic prompt values - -### Variable interpolation - -You can directly access the default prompt variables (for example dfs, conversation, etc) and call their methods from prompt text itself. - -```python -from pandasai import SmartDataframe -from pandasai.prompts import AbstractPrompt - - -class MyCustomPrompt(AbstractPrompt): - template = """You are given a dataframe with number if rows equal to {dfs[0].shape[0]} and number of columns equal to {dfs[0].shape[1]} - -Here's the conversation: -{conversation} -""" - - -df = SmartDataframe("data.csv", config={ - "custom_prompts": { - "generate_python_code": MyCustomPrompt() - } -}) -``` diff --git a/docs/examples.md b/docs/examples.md index bb82731a2..d34e1c9de 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,11 +1,11 @@ # Examples -Some examples of using PandasAI with different data sources. -Other [examples](../examples) are included in the repository along with samples of data. +Here are some examples of how to use PandasAI. +More [examples](https://github.com/Sinaptik-AI/pandas-ai/tree/main/examples) are included in the repository along with samples of data. ## Working with pandas dataframes -Example of using PandasAI with a Pandas DataFrame +Using PandasAI with a Pandas DataFrame ```python from pandasai import SmartDataframe @@ -13,20 +13,19 @@ import pandas as pd from pandasai.llm import OpenAI # pandas dataframe -df = pd.DataFrame({ +sales_by_country = pd.DataFrame({ "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) llm = OpenAI(api_token="YOUR_API_TOKEN") # convert to SmartDataframe -df = SmartDataframe(df, config={"llm": llm}) +df = SmartDataframe(sales_by_country, config={"llm": llm}) -response = df.chat('Calculate the sum of the gdp of north american countries') +response = df.chat('Which are the top 5 countries by sales?') print(response) -# Output: 20901884461056 +# Output: China, United States, Japan, Germany, Australia ``` ## Working with CSVs @@ -132,20 +131,16 @@ from pandasai.llm import OpenAI llm = OpenAI(api_token="YOUR_API_TOKEN") -# You can instantiate a SmartDataframe with a Polars DataFrame - -df = pd.DataFrame({ - "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", - "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, - 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] +sales_by_country = pd.DataFrame({ + "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) pandasai.set_pd_engine("modin") df = SmartDataframe(df, config={"llm": llm}) -response = df.chat("How many loans are from men and have been paid off?") +response = df.chat('Which are the top 5 countries by sales?') print(response) +# Output: China, United States, Japan, Germany, Australia # you can switch back to pandas using # pandasai.set_pd_engine("pandas") @@ -168,13 +163,10 @@ from pandasai.llm import OpenAI llm = OpenAI(api_token="YOUR_API_TOKEN") - # You can instantiate a SmartDataframe with a Polars DataFrame - -df = pl.DataFrame({ +sales_by_country = pl.DataFrame({ "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) df = SmartDataframe(df, config={"llm": llm}) @@ -185,7 +177,7 @@ print(response) ## Plotting -Example of using PandasAI to generate a chart from a Pandas DataFrame +Example of using PandasAI to plot a chart from a Pandas DataFrame ```python from pandasai import SmartDataframe @@ -227,7 +219,7 @@ print(response) # Output: check out $pwd/exports/charts/{hashid}/chart.png ``` -## Working with multiple dataframes (with SmartDatalake) +## Working with multiple dataframes (using the SmartDatalake) Example of using PandasAI with multiple dataframes. In order to use multiple dataframes as a data source, you need to use a `SmartDatalake` instead of a `SmartDataframe`. You can instantiate a `SmartDatalake` as follows: @@ -258,42 +250,6 @@ print(response) # Output: Olivia gets paid the most. ``` -## Chain of commands - -You can chain commands by passing the output of one command to the next one. In the example, we first filter the original -dataframe by gender and then by loans that have been paid off. - -```python -from pandasai import SmartDataframe -from pandasai.llm import OpenAI - -llm = OpenAI(api_token="YOUR_API_TOKEN") -df = SmartDataframe("data/Loan payments data.csv", config={"llm": llm}) - -# We filter by males only -from_males_df = df.chat("Filter the dataframe by women") - -# We filter by loans that have been paid off -paid_from_males_df = from_males_df.chat("Filter the dataframe by loans that have been paid off") -print(paid_from_males_df) -# Output: -# [247 rows x 11 columns] -# Loan_ID loan_status Principal terms effective_date due_date paid_off_time past_due_days age education Gender -# 0 xqd20166231 PAIDOFF 1000 30 9/8/2016 10/7/2016 9/14/2016 19:31 NaN 45 High School or Below male -# 3 xqd20160004 PAIDOFF 1000 15 9/8/2016 9/22/2016 9/22/2016 20:00 NaN 27 college male -# 5 xqd20160706 PAIDOFF 300 7 9/9/2016 9/15/2016 9/9/2016 13:45 NaN 35 Master or Above male -# 6 xqd20160007 PAIDOFF 1000 30 9/9/2016 10/8/2016 10/7/2016 23:07 NaN 29 college male -# 7 xqd20160008 PAIDOFF 1000 30 9/9/2016 10/8/2016 10/5/2016 20:33 NaN 36 college male -# .. ... ... ... ... ... ... ... ... ... ... ... -# 294 xqd20160295 PAIDOFF 1000 30 9/14/2016 10/13/2016 10/13/2016 13:00 NaN 36 Bechalor male -# 296 xqd20160297 PAIDOFF 800 15 9/14/2016 9/28/2016 9/21/2016 4:42 NaN 27 college male -# 297 xqd20160298 PAIDOFF 1000 30 9/14/2016 10/13/2016 10/13/2016 9:00 NaN 29 High School or Below male -# 298 xqd20160299 PAIDOFF 1000 30 9/14/2016 10/13/2016 10/13/2016 9:00 NaN 40 High School or Below male -# 299 xqd20160300 PAIDOFF 1000 30 9/14/2016 10/13/2016 10/13/2016 11:00 NaN 28 college male - -# [247 rows x 11 columns] -``` - ## Working with Agent With the chat agent, you can engage in dynamic conversations where the agent retains context throughout the discussion. This enables you to have more interactive and meaningful exchanges. @@ -349,6 +305,29 @@ response = agent.explain() print(response) ``` +## Description for an Agent + +When you instantiate an agent, you can provide a description of the agent. THis description will be used to describe the agent in the chat and to provide more context for the LLM about how to respond to queries. + +Some examples of descriptions can be: + +- You are a data analysis agent. Your main goal is to help non-technical users to analyze data +- Act as a data analyst. Every time I ask you a question, you should provide the code to visualize the answer using plotly + +```python +from pandasai import Agent + +from pandasai.llm.openai import OpenAI + +llm = OpenAI("YOUR_API_KEY") + +agent = Agent( + "data.csv", + config={"llm": llm}, + description="You are a data analysis agent. Your main goal is to help non-technical users to analyze data", +) +``` + ## Add Skills to the Agent You can add customs functions for the agent to use, allowing the agent to expand its capabilities. These custom functions can be seamlessly integrated with the agent's skills, enabling a wide range of user-defined operations. diff --git a/docs/fields-description.md b/docs/fields-description.md new file mode 100644 index 000000000..37508174b --- /dev/null +++ b/docs/fields-description.md @@ -0,0 +1,39 @@ +# Use custom field descriptions + +The `field_descriptions` is a dictionary attribute of the `BaseConnector` class. It is used to provide additional information or descriptions about each individual field in the data source. This can be useful for providing context or explanations for the data in each field, especially when the field names themselves are not self-explanatory. + +Here's an example of how you might use `field_descriptions`: + +```python +field_descriptions = { + 'user_id': 'The unique identifier for each user', + 'payment_id': 'The unique identifier for each payment', + 'payment_provider': 'The payment provider used for the payment (e.g. PayPal, Stripe, etc.)' +} +``` + +In this example, `user_id`, `payment_id`, and `payment_provider` are the names of the fields in the data source, and the corresponding values are descriptions of what each field represents. + +When initializing a `BaseConnector` instance (or any other connector), you can pass in this `field_descriptions` dictionary as an argument: + +```python +connector = BaseConnector(config, name='My Connector', field_descriptions=field_descriptions) +``` + +Another example using a pandas connector: + +```python +import pandas as pd +from pandasai.connectors import PandasConnector +from pandasai import SmartDataframe + +df = pd.DataFrame({ + 'user_id': [1, 2, 3], + 'payment_id': [101, 102, 103], + 'payment_provider': ['PayPal', 'Stripe', 'PayPal'] +}) +connector = PandasConnector(df, field_descriptions=field_descriptions) +sdf = SmartDataframe(connector) +sdf.chat("What is the most common payment provider?") +# Output: PayPal +``` diff --git a/docs/getting-started.md b/docs/getting-started.md index f9d72f733..af5211816 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -16,7 +16,7 @@ pip install pandasai ### Optional dependencies -To keep the package size small, we have decided to make some dependencies optional. To install `pandasai` with these extra dependencies, run: +In order to keep the installation size small, `pandasai` does not include all the dependencies that it supports by default. You can install the extra dependencies by running the following command: ```console pip install pandasai[extra-dependency-name] @@ -24,7 +24,7 @@ pip install pandasai[extra-dependency-name] You can replace `extra-dependency-name` with any of the following: -- `google-aip`: this extra dependency is required if you want to use Google PaLM as a language model. +- `google-ai`: this extra dependency is required if you want to use Google PaLM as a language model. - `google-sheet`: this extra dependency is required if you want to use Google Sheets as a data source. - `excel`: this extra dependency is required if you want to use Excel files as a data source. - `modin`: this extra dependency is required if you want to use Modin dataframes as a data source. @@ -36,92 +36,95 @@ You can replace `extra-dependency-name` with any of the following: - `plotly`: this extra dependency is required if you want to support plotly for plotting. - `statsmodels`: this extra dependency is required if you want to support statsmodels. - `scikit-learn`: this extra dependency is required if you want to support scikit-learn. -- `streamlit`: this extra dependency is required if you want to support the streamlit. +- `streamlit`: this extra dependency is required if you want to support streamlit. ## SmartDataframe -Below is simple example to get started with `pandasai`. +The `SmartDataframe` class is the main class of `pandasai`. It is used to interact with a single dataframe. Below is simple example to get started with `pandasai`. ```python import pandas as pd from pandasai import SmartDataframe from pandasai.llm import OpenAI - # Sample DataFrame -df = pd.DataFrame({ +sales_by_country = pd.DataFrame({ "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) # Instantiate a LLM llm = OpenAI(api_token="YOUR_API_TOKEN") -df = SmartDataframe(df, config={"llm": llm}) -df.chat('Which are the 5 happiest countries?') -# Output: United Kingdom, Canada, Australia, United States, Germany +df = SmartDataframe(sales_by_country, config={"llm": llm}) +df.chat('Which are the top 5 countries by sales?') +# Output: China, United States, Japan, Germany, Australia ``` If you want to learn more about the `SmartDataframe` class, check out this video: -[![Intro to SmartDataframe](https://cdn.loom.com/sessions/thumbnails/1ec1b8fbaa0e4ae0ab99b728b8b05fdb-00001.jpg)](https://www.loom.com/embed/1ec1b8fbaa0e4ae0ab99b728b8b05fdb?sid=7370854b-57c3-4f00-801b-69811a98d970 "Intro to SmartDataframe") +[![Intro to SmartDataframe](https://cdn.loom.com/sessions/thumbnails/1ec1b8fbaa0e4ae0ab99b728b8b05fdb-00001.jpg)](https://www.loom.com/embed/1ec1b8fbaa0e4ae0ab99b728b8b05fdb?sid=7370854b-57c3-4f00-801b-69811a98d970 "Intro to the SmartDataframe") ### How to generate an OpenAI API Token -Users are required to generate `YOUR_API_TOKEN`. Follow these simple steps to generate `YOUR_API_TOKEN` with -[openai](https://platform.openai.com/overview). +In order to use the OpenAI language model, users are required to generate a token. Follow these simple steps to generate a token with [openai](https://platform.openai.com/overview): 1. Go to https://openai.com/api/ and signup with your email address or connect your Google Account. 2. Go to View API Keys on left side of your Personal Account Settings. 3. Select Create new Secret key. > The API access to OPENAI is a paid service. You have to set up billing. -> Read the [Pricing](https://platform.openai.com/docs/quickstart/pricing) information before experimenting. +> Make sure you read the [Pricing](https://platform.openai.com/docs/quickstart/pricing) information before experimenting. -### Passing name and description +### Passing name and description for a dataframe -Sometimes, to help the LLM to work better, you might want to pass a name and a description of the dataframe. -You can do so as follows: +Sometimes, in order to help the LLM to work better, you might want to pass a name and a description of the dataframe. You can do this as follows: ```python -df = SmartDataframe(df, name="My DataFrame", description="This is my DataFrame") +df = SmartDataframe(df, name="My DataFrame", description="Brief description of what the dataframe contains") ``` ## SmartDatalake -PandasAI also supports queries with multiple dataframes. To perform such queries, you can use a `SmartDatalake` instead of a `SmartDataframe`. A `SmartDatalake` is a collection of `SmartDataframe`s. You can instantiate a `SmartDatalake` as follows: +PandasAI also supports queries with multiple dataframes. To perform such queries, you can use a `SmartDatalake` instead of a `SmartDataframe`. + +Similarly to a `SmartDataframe`, you can instantiate a `SmartDatalake` as follows: ```python -from pandasai import SmartDatalake import pandas as pd +from pandasai import SmartDatalake +from pandasai.llm import OpenAI -# Sample DataFrames -df1 = pd.DataFrame({ - "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] -}) -df2 = "data/Loan payments data.csv" -df3 = "data/Loan payments data.xlsx" +employees_data = { + 'EmployeeID': [1, 2, 3, 4, 5], + 'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'], + 'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance'] +} -dl = SmartDatalake([df1, df2, df3]) -``` +salaries_data = { + 'EmployeeID': [1, 2, 3, 4, 5], + 'Salary': [5000, 6000, 4500, 7000, 5500] +} -Then, similar to how you would use a `SmartDataframe`, you can use the `SmartDatalake` as follows: +employees_df = pd.DataFrame(employees_data) +salaries_df = pd.DataFrame(salaries_data) -```python -dl.chat('Which are the 5 happiest countries?') -# Output: United Kingdom, Canada, Australia, United States, Germany + +llm = OpenAI() +dl = SmartDatalake([employees_df, salaries_df], config={"llm": llm}) +dl.chat("Who gets paid the most?") +# Output: Olivia gets paid the most ``` PandasAI will automatically figure out which dataframe or dataframes are relevant to the query and will use only those dataframes to answer the query. -[![Intro to SmartDatalake](https://cdn.loom.com/sessions/thumbnails/a2006ac27b0545189cb5b9b2e011bc72-00001.jpg)](https://www.loom.com/share/a2006ac27b0545189cb5b9b2e011bc72 "Intro to SmartDatalake") +[![Intro to the SmartDatalake](https://cdn.loom.com/sessions/thumbnails/a2006ac27b0545189cb5b9b2e011bc72-00001.jpg)](https://www.loom.com/share/a2006ac27b0545189cb5b9b2e011bc72 "Intro to SmartDatalake") ## Agent -PandasAI also supports agents. While a `SmartDataframe` or a `SmartDatalake` can be used to answer a single query and are meant to be used in a single session and for exploratory data analysis, an agent can be used for multi-turn conversations and for production use cases. You can instantiate an agent as follows: +While a `SmartDataframe` or a `SmartDatalake` can be used to answer a single query and are meant to be used in a single session and for exploratory data analysis, an agent can be used for multi-turn conversations. + +To instantiate an agent, you can use the following code: ```python from pandasai import Agent @@ -130,29 +133,24 @@ from pandasai.llm import OpenAI # Sample DataFrames -df1 = pd.DataFrame({ +sales_by_country = pd.DataFrame({ "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000], + "deals_opened": [142, 80, 70, 90, 60, 50, 40, 30, 110, 120] + "deals_closed": [120, 70, 60, 80, 50, 40, 30, 20, 100, 110] }) llm = OpenAI(api_token="YOUR_API_TOKEN") - -agent = Agent([df1], config={"llm": llm}) -``` - -Then, you can use the agent as follows: - -```python -agent.chat('Which are the 5 happiest countries?') -# Output: United Kingdom, Canada, Australia, United States, Germany +agent = Agent([sales_by_country], config={"llm": llm}) +agent.chat('Which are the top 5 countries by sales?') +# Output: China, United States, Japan, Germany, Australia ``` Contrary to a `SmartDataframe` or a `SmartDatalake`, an agent will keep track of the state of the conversation and will be able to answer multi-turn conversations. For example: ```python -agent.chat('And what is the GDP of these countries?') -# Output: 2891615567872, 1607402389504, 1490967855104, 19294482071552, 3435817336832 +agent.chat('And which one has the most deals?') +# Output: United States has the most deals ``` ### Clarification questions @@ -167,7 +165,7 @@ this will return up to 3 clarification questions that the agent can ask to the u ### Explanation -An agent will also be able to explain the answer to the user. For example: +An agent will also be able to explain the answer given to the user. For example: ```python response = agent.chat('What is the GDP of the United States?') @@ -184,15 +182,13 @@ Rephrase question to get accurate and comprehensive response from the model. For ```python rephrased_query = agent.rephrase_query('What is the GDP of the United States?') -print("The answer is", rephrased_query) +print("The rephrased query is", rephrased_query) ``` ## Config -When you instantiate a `SmartDataframe`, you can pass a `config` object as the second argument. This object can contain custom settings that will be used by `pandasai` when generating code. - -As an alternative, you can simply edit the `pandasai.json` file in the root of your project. This file will be automatically loaded by `pandasai` and these will be the default settings. You will still be able to override these settings by passing the settings that you want to override when instantiating a `SmartDataframe`. +To customize PandasAI's `SmartDataframe`, you can either pass a `config` object with specific settings upon instantiation or modify the `pandasai.json` file in your project's root. The latter serves as the default configuration but can be overridden by directly specifying settings in the `config` object at creation. This approach ensures flexibility and precision in how PandasAI handles your data. Settings: @@ -203,11 +199,10 @@ Settings: - `enforce_privacy`: whether to enforce privacy. Defaults to `False`. If set to `True`, PandasAI will not send any data to the LLM, but only the metadata. By default, PandasAI will send 5 samples that are anonymized to improve the accuracy of the results. - `save_charts`: whether to save the charts generated by PandasAI. Defaults to `False`. You will find the charts in the root of your project or in the path specified by `save_charts_path`. - `save_charts_path`: the path where to save the charts. Defaults to `exports/charts/`. You can use this setting to override the default path. -- `open_charts`: whether to open the chart during parsing of the response from the LLM. Defaults to `True`. You can completely disable displaying of charts by setting this option to `False`. +- `open_charts`: whether to open the chart during parsing of the response from the LLM. Defaults to `True`. You can completely disable displaying of charts by setting this option to `False`. - `enable_cache`: whether to enable caching. Defaults to `True`. If set to `True`, PandasAI will cache the results of the LLM to improve the response time. If set to `False`, PandasAI will always call the LLM. - `use_error_correction_framework`: whether to use the error correction framework. Defaults to `True`. If set to `True`, PandasAI will try to correct the errors in the code generated by the LLM with further calls to the LLM. If set to `False`, PandasAI will not try to correct the errors in the code generated by the LLM. - `max_retries`: the maximum number of retries to use when using the error correction framework. Defaults to `3`. You can use this setting to override the default number of retries. -- `custom_prompts`: the custom prompts to use. Defaults to `{}`. You can use this setting to override the default custom prompts. You can find more information about custom prompts [here](custom-prompts.md). - `custom_whitelisted_dependencies`: the custom whitelisted dependencies to use. Defaults to `{}`. You can use this setting to override the default custom whitelisted dependencies. You can find more information about custom whitelisted dependencies [here](custom-whitelisted-dependencies.md). ## Demo in Google Colab @@ -216,7 +211,6 @@ Try out PandasAI in your browser: [![Open in Colab](https://camo.githubusercontent.com/84f0493939e0c4de4e6dbe113251b4bfb5353e57134ffd9fcab6b8714514d4d1/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) -## Examples - -You can find some examples [here](examples.md). +## Other Examples +You can find all the other examples [here](examples.md). diff --git a/docs/images/histogram-chart.png b/docs/images/histogram-chart.png deleted file mode 100644 index 0d6447b1e..000000000 Binary files a/docs/images/histogram-chart.png and /dev/null differ diff --git a/docs/images/pandas-ai.png b/docs/images/pandas-ai.png deleted file mode 100644 index fb6033bba..000000000 Binary files a/docs/images/pandas-ai.png and /dev/null differ diff --git a/docs/index.md b/docs/index.md index f3ffbd9fa..e9ded5348 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# 🐼 PandasAI +# ![PandasAI](https://github.com/Sinaptik-AI/pandas-ai/blob/main/images/logo.png?raw=true) [![Release](https://img.shields.io/pypi/v/pandasai?label=Release&style=flat-square)](https://pypi.org/project/pandasai/) [![CI](https://github.com/gventuri/pandas-ai/actions/workflows/ci.yml/badge.svg)](https://github.com/gventuri/pandas-ai/actions/workflows/ci.yml/badge.svg) @@ -9,32 +9,31 @@ [![Downloads](https://static.pepy.tech/badge/pandasai)](https://pepy.tech/project/pandasai) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) -PandasAI is a Python library that adds Generative AI capabilities to pandas, the popular data analysis and manipulation tool. It is designed to be used in conjunction with pandas, and is not a replacement for it. +PandasAI is a Python library that makes it easy to ask questions to your data (CSV, XLSX, PostgreSQL, MySQL, BigQuery, Databrick, Snowflake, etc.) in natural language. xIt helps you to explore, clean, and analyze your data using generative AI. -PandasAI makes pandas (and all the most used data analyst libraries) conversational, allowing you to ask questions to your data in natural language. For example, you can ask PandasAI to find all the rows in a DataFrame where the value of a column is greater than 5, and it will return a DataFrame containing only those rows. +Beyond querying, PandasAI offers functionalities to visualize data through graphs, cleanse datasets by addressing missing values, and enhance data quality through feature generation, making it a comprehensive tool for data scientists and analysts. -You can also ask PandasAI to draw graphs, clean data, impute missing values, and generate features. +## Features -## What are the value props of PandasAI? - -PandasAI provides two main value props: - -- **Ease of use:** PandasAI is designed to be easy to use, even if you are not familiar with generative AI or with `pandas`. You can simply ask questions to your data in natural language, and PandasAI will generate the code to answer your question. -- **Power:** PandasAI can be used to perform a wide variety of tasks, including data exploration, analysis, visualization, cleaning, imputation, and feature engineering. +- **Natural language querying**: Ask questions to your data in natural language. +- **Data visualization**: Generate graphs and charts to visualize your data. +- **Data cleansing**: Cleanse datasets by addressing missing values. +- **Feature generation**: Enhance data quality through feature generation. +- **Data connectors**: Connect to various data sources like CSV, XLSX, PostgreSQL, MySQL, BigQuery, Databrick, Snowflake, etc. ## How does PandasAI work? -PandasAI works by using a generative AI model to generate Python code. When you ask PandasAI a question, the model will first try to understand the question. Then, it will generate the Python code that would answer the question. Finally, the code will be executed, and the results will be returned to you. +PandasAI uses a generative AI model to understand and interpret natural language queries and translate them into python code and SQL queries. It then uses the code to interact with the data and return the results to the user. ## Who should use PandasAI? -PandasAI is a good choice for anyone who wants to make their data analysis and manipulation workflow more efficient. It is especially useful for people who are not familiar with `pandas`, but also for people who are familiar with it and want to make their workflow more efficient. +PandasAI is designed for data scientists, analysts, and engineers who want to interact with their data in a more natural way. It is particularly useful for those who are not familiar with SQL or Python or who want to save time and effort when working with data. It is also useful for those who are familiar with SQL and Python, as it allows them to ask questions to their data without having to write any complex code. ## How to get started with PandasAI? To get started with PandasAI, you first need to install it. You can do this by running the following command: -```console +```bash # Using poetry (recommended) poetry add pandasai @@ -42,42 +41,31 @@ poetry add pandasai pip install pandasai ``` -Once you have installed PandasAI, you can start using it by importing it into your Python code. -Now you can start asking questions to your data in natural language. For example, the following code will ask PandasAI to find all the rows in a DataFrame where the value of the `gdp` column is greater than 5: +Once you have installed PandasAI, you can start using it by importing the `SmartDataframe` class and instantiating it with your data. You can then use the `chat` method to ask questions to your data in natural language. ```python import pandas as pd from pandasai import SmartDataframe -df = pd.DataFrame({ - "country": [ - "United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [ - 19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064 - ], +# Sample DataFrame +sales_by_country = pd.DataFrame({ + "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], + "sales": [5000, 3200, 2900, 4100, 2300, 2100, 2500, 2600, 4500, 7000] }) # Instantiate a LLM from pandasai.llm import OpenAI -llm = OpenAI(api_token="YOUR_API_TOKEN") # Get API token from https://platform.openai.com/account/api-keys - -df = SmartDataframe(df, config={"llm": llm}) -df.chat('Which are the countries with GDP greater than 3000000000000?') -# Output: -# 0 United States -# 3 Germany -# 8 Japan -# 9 China -# Name: country, dtype: object -``` +llm = OpenAI(api_token="YOUR_API_TOKEN") -This will return a DataFrame containing only the rows where the value of the `gdp` column is greater than 5. - - +df = SmartDataframe(sales_by_country, config={"llm": llm}) +df.chat('Which are the top 5 countries by sales?') +## Output +# China, United States, Japan, Germany, Australia +``` ## Demo -Try out PandasAI in your browser: +Try out PandasAI yourself in your browser: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1ZnO-njhL7TBOYPZaqvMvGtsjckZKrv2E?usp=sharing) @@ -87,4 +75,6 @@ If you have any questions or need help, please join our **[discord server](https ## License -PandasAI is licensed under the MIT License. See the LICENSE file for more details. +PandasAI is available under the MIT expat license, except for the `pandasai/ee` directory (which has it's [license here](https://github.com/Sinaptik-AI/pandas-ai/blob/master/pandasai/ee/LICENSE) if applicable. + +If you are interested in managed PandasAI Cloud or self-hosted Enterprise Offering, take a look at [our website](https://pandas-ai.com) or [book a meeting with us](https://zcal.co/gventuri/pandas-ai-demo). diff --git a/docs/pipelines/pipelines.md b/docs/pipelines/pipelines.md index 990545e74..3fd138cf7 100644 --- a/docs/pipelines/pipelines.md +++ b/docs/pipelines/pipelines.md @@ -15,9 +15,9 @@ PandasAI provides the following core pipeline logic units that can be composed t PandasAI provides the following predefined pipelines that combine logic units: -### GenerateSmartDataLakePipeline +### GenerateChatPipeline -The `GenerateSmartDataLakePipeline` generates new data in a SmartDatalake. It chains together logic units for: +The `GenerateChatPipeline` generates new data in a Agent. It chains together logic units for: - `CacheLookup` - Checking if data is cached - `PromptGeneration` - Generating prompt @@ -27,15 +27,6 @@ The `GenerateSmartDataLakePipeline` generates new data in a SmartDatalake. It ch - `ResultValidation` - Validating execution result - `ResultParsing` - Parsing result into data -### GenerateSDFPipeline - -The `GenerateSDFPipeline` generates a new synthetic dataframe by chaining logic units: - -- `SyntheticDataframePrompt` - Generating dataframe prompt -- `PromptExecution` - Executing prompt -- `SDFCodeExecutor` - Executing generated code -- `ProcessOutput` - Post-processing dataframe - ## Custom Pipelines Custom pipelines can be created by composing `BaseLogicUnit` implementations: diff --git a/docs/release-notes.md b/docs/release-notes.md deleted file mode 100644 index b421779bb..000000000 --- a/docs/release-notes.md +++ /dev/null @@ -1,10 +0,0 @@ -# Release Process -At the moment, the release process is manual. We try to make frequent releases. Usually, we release a new version -when we have a new feature or bugfix. A developer with admin rights to the repository will create a new release on -GitHub, and then publish the new version to PyPI. - -## Project Documentation - -The release of project documentation is also a manual process and hosted on `readthedocs` server. - -> **NOTE:** This project is under active development! \ No newline at end of file diff --git a/docs/save-dataframes.md b/docs/save-dataframes.md deleted file mode 100644 index 1ce9f79be..000000000 --- a/docs/save-dataframes.md +++ /dev/null @@ -1,35 +0,0 @@ -# Save and load dataframes - -In some cases, you might want to save the configuration of a `SmartDataframe` (including the name, the description, the file path and the sample head, if any). You can do so by calling the `save` method of the `SmartDataframe` as follows: - -```python -from pandasai import SmartDataframe -import pandas as pd - -# head df -head_df = pd.DataFrame({ - "country": ["United States", "United Kingdom", "France", "Germany", "Italy", "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] -}) - -df = SmartDataframe( - "data/country_gdp.csv", - name="Country GDP", - description="A dataset containing the GDP of countries", - custom_head=head_df -) -df.save("country") -``` - -From now on, you will be able to instantiate your smart dataframe without having to pass the configuration again, like this: - -```python -from pandasai import SmartDataframe - -df = SmartDataframe("country") -``` - -If you don't pass any argument to the `save` method, the name will be equals to the `name` param of the dataframe. - -The configurations that you save are stored in the `pandasai.json` file, which is located in the root of your project. diff --git a/docs/shortcuts.md b/docs/shortcuts.md deleted file mode 100644 index 75fc39e44..000000000 --- a/docs/shortcuts.md +++ /dev/null @@ -1,151 +0,0 @@ -# Shortcuts - -Shortcuts are a way to quickly access the most common queries. At the moment, shortcuts are in beta, and only a few are available. More will be added in the future. - -## Available shortcuts - -### clean_data - -```python -df = SmartDataframe('data.csv') -df.clean_data() -``` - -This shortcut will do data cleaning on the data frame. - -### impute_missing_values - -```python -df = SmartDataframe('data.csv') -df.impute_missing_values() -``` - -This shortcut will impute missing values in the data frame. - -### generate_features - -```python -df = SmartDataframe('data.csv') -df.generate_features() -``` - -This shortcut will generate features in the data frame. - -### plot_pie_chart - -```python -df = SmartDataframe('data.csv') -df.plot_pie_chart(labels = ['a', 'b', 'c'], values = [1, 2, 3]) -``` - -This shortcut will plot a pie chart of the data frame. - -### plot_bar_chart - -```python -df = SmartDataframe('data.csv') -df.plot_bar_chart(x = ['a', 'b', 'c'], y = [1, 2, 3]) -``` - -This shortcut will plot a bar chart of the data frame. - -### plot_histogram - -```python -df = SmartDataframe('data.csv') -df.plot_histogram(column = 'a') -``` - -This shortcut will plot a histogram of the data frame. - -### plot_line_chart - -```python -df = SmartDataframe('data.csv') -df.plot_line_chart(x = ['a', 'b', 'c'], y = [1, 2, 3]) -``` - -This shortcut will plot a line chart of the data frame. - -### plot_scatter_chart - -```python -df = SmartDataframe('data.csv') -df.plot_scatter_chart(x = ['a', 'b', 'c'], y = [1, 2, 3]) -``` - -This shortcut will plot a scatter chart of the data frame. - -### plot_correlation_heatmap - -```python -df = SmartDataframe('data.csv') -df.plot_correlation_heatmap(df) -``` - -This shortcut will plot a correlation heatmap of the data frame. - -### plot_confusion_matrix - -```python -df = SmartDataframe('data.csv') -df.plot_confusion_matrix(y_true = [1, 2, 3], y_pred = [1, 2, 3]) -``` - -This shortcut will plot a confusion matrix of the data frame. - -### plot_roc_curve - -```python -df = SmartDataframe('data.csv') -df.plot_roc_curve(y_true = [1, 2, 3], y_pred = [1, 2, 3]) -``` - -This shortcut will plot a ROC curve of the data frame. - -### boxplot - -```python -df = SmartDataframe('data.csv') -df.boxplot(col='A', by='B', style='Highlight outliers with a x') -``` - -This shortcut plots a box-and-whisker plot using the DataFrame `df`, focusing on the `'A'` column and grouping the data by the `'B'` column. - -The `style` parameter allows users to communicate their desired plot customizations to the Language Model, providing flexibility for further refinement and adaptability to specific visual requirements. - -### rolling_mean - -```python -df = SmartDataframe('data.csv') -df.rolling_mean(column = 'a', window = 5) -``` - -This shortcut will calculate the rolling mean of the data frame. - -### rolling_median - -```python -df = SmartDataframe('data.csv') -df.rolling_median(column = 'a', window = 5) -``` - -This shortcut will calculate the rolling median of the data frame. - -### rolling_std - -```python -df = SmartDataframe('data.csv') -df.rolling_std(column = 'a', window = 5) -``` - -This shortcut will calculate the rolling standard deviation of the data frame. - -### segment_customers - -```python -df = SmartDataframe('data.csv') -df.segment_customers(features = ['a', 'b', 'c'], n_clusters = 5) -``` - -This shortcut will segment customers in the data frame. diff --git a/docs/train.md b/docs/train.md new file mode 100644 index 000000000..9ad41b3b0 --- /dev/null +++ b/docs/train.md @@ -0,0 +1,63 @@ +# Train with your own settings + +You can train PandasAI to understand your data better and to improve its performance. Training is as easy as calling the `train` method on the `SmartDataframe`, `SmartDatalake` or `Agent`. + +There are two kinds of training: + +- instructions training +- q/a training + + +
+ +## Instructions training + +Instructions training is used to teach PandasAI how you expect it to respond to certain queries. You can provide generic instructions about how you expect the model to approach certain types of queries, and PandasAI will use these instructions to generate responses to similar queries. + +For example, you might want the LLM to be aware that your company's fiscal year starts in April, or about specific ways you want to handle missing data. Or you might want to teach it about specific business rules or data analysis best practices that are specific to your organization. + +To train PandasAI with instructions, you can use the `train` method on the `Agent`, `SmartDataframe` or `SmartDatalake`, as it follows: + +```python +from pandasai import Agent + +df = Agent("data.csv") +df.train(docs="The fiscal year starts in April") + +response = df.chat("What is the total sales for the fiscal year?") +print(response) +# The model will use the information provided in the training to generate a response +``` + +Your training data is persisted, so you only need to train the model once. + +## Q/A training + +Q/A training is used to teach PandasAI the desired process to answer specific questions, enhancing the model's performance and determinism. One of the biggest challenges with LLMs is that they are not deterministic, meaning that the same question can produce different answers at different times. Q/A training can help to mitigate this issue. + +To train PandasAI with Q/A, you can use the `train` method on the `Agent`, `SmartDataframe` or `SmartDatalake`, as it follows: + +```python +from pandasai import Agent + +df = Agent("data.csv") + +# Train the model +query = "What is the total sales for the current fiscal year?" +response = """ +import pandas as pd + +df = dfs[0] + +# Calculate the total sales for the current fiscal year +total_sales = df[df['date'] >= pd.to_datetime('today').replace(month=4, day=1)]['sales'].sum() +result = { "type": "number", "value": total_sales } +""" +df.train(queries=[query], codes=[response]) + +response = df.chat("What is the total sales for the last fiscal year?") +print(response) +# The model will use the information provided in the training to generate a response +``` + +Also in this case, your training data is persisted, so you only need to train the model once. diff --git a/examples/agent.py b/examples/agent.py index c3847269a..095a1ab96 100644 --- a/examples/agent.py +++ b/examples/agent.py @@ -34,3 +34,21 @@ # Explain how the chat response is generated response = agent.explain() print(response) + + +# Train with data +queries = [ + "Display the distribution of ages in the population.", + "Visualize the distribution of product ratings.", + "Show the distribution of household incomes in a region.", +] + +codes = [ + "display_age_distribution()", + "visualize_product_ratings_distribution()", + "show_household_incomes_distribution_in_region()", +] + +agent.train(queries, codes) + +print("Done") diff --git a/examples/from_airtable.py b/examples/from_airtable.py index af1dd3a13..cf604ec09 100644 --- a/examples/from_airtable.py +++ b/examples/from_airtable.py @@ -1,4 +1,4 @@ -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.connectors import AirtableConnector from pandasai.llm import OpenAI @@ -16,7 +16,7 @@ ) llm = OpenAI("OPENAI_API_KEY") -df = SmartDataframe(airtable_connectors, config={"llm": llm}) +df = Agent([airtable_connectors], config={"llm": llm}) response = df.chat("How many rows are there in data ?") print(response) diff --git a/examples/from_csv.py b/examples/from_csv.py index 275ee2423..a5bfabecd 100644 --- a/examples/from_csv.py +++ b/examples/from_csv.py @@ -1,10 +1,13 @@ """Example of using PandasAI with a CSV file.""" -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI llm = OpenAI() -df = SmartDataframe("examples/data/Loan payments data.csv", config={"llm": llm}) +df = Agent( + ["examples/data/Loan payments data.csv"], + config={"llm": llm, "enable_cache": False, "max_retries": 1}, +) response = df.chat("How many loans are from men and have been paid off?") print(response) # Output: 247 loans have been paid off by men. diff --git a/examples/from_databricks.py b/examples/from_databricks.py index da3619726..190c03839 100644 --- a/examples/from_databricks.py +++ b/examples/from_databricks.py @@ -1,7 +1,9 @@ """Example of using PandasAI with a DataBricks""" -from pandasai import SmartDataframe -from pandasai.connectors import DatabricksConnector +from pandasai import Agent + +# A license might be required for using Snowflake with PandasAI +from pandasai.ee.connectors import DatabricksConnector from pandasai.llm import OpenAI databricks_connector = DatabricksConnector( @@ -21,7 +23,7 @@ ) llm = OpenAI("OPEN_API_KEY") -df = SmartDataframe(databricks_connector, config={"llm": llm}) +df = Agent([databricks_connector], config={"llm": llm}) response = df.chat("How many people from the United states?") print(response) diff --git a/examples/from_dataframe.py b/examples/from_dataframe.py index 216bcc31f..fdaf727b6 100644 --- a/examples/from_dataframe.py +++ b/examples/from_dataframe.py @@ -3,13 +3,13 @@ import pandas as pd from data.sample_dataframe import dataframe -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI df = pd.DataFrame(dataframe) llm = OpenAI() -df = SmartDataframe(df=pd.DataFrame(dataframe), config={"llm": llm}) +df = Agent([pd.DataFrame(dataframe)], config={"llm": llm}) response = df.chat("Calculate the sum of the gdp of north american countries") print(response) # Output: 20901884461056 diff --git a/examples/from_excel.py b/examples/from_excel.py index 7ed269e66..7b7f4b579 100644 --- a/examples/from_excel.py +++ b/examples/from_excel.py @@ -1,11 +1,11 @@ """Example of using PandasAI with am Excel file.""" -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI llm = OpenAI() -df = SmartDataframe("examples/data/Loan payments data.xlsx", config={"llm": llm}) +df = Agent(["examples/data/Loan payments data.xlsx"], config={"llm": llm}) response = df.chat("How many loans are from men and have been paid off?") print(response) # Output: 247 loans have been paid off by men. diff --git a/examples/from_google_sheets.py b/examples/from_google_sheets.py index 22921bbe4..fda33b89e 100644 --- a/examples/from_google_sheets.py +++ b/examples/from_google_sheets.py @@ -1,13 +1,13 @@ """Example of using PandasAI with am Excel file.""" -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI # Betas & Bludgers Writing Competitions List (source: https://heystacks.com/?type=sheets&tags=data) google_sheets_url = "https://docs.google.com/spreadsheets/d/1VKkhugv2eF87AoOm4OXjI0sQEHrNhxy6gPL3F7xyw7g/edit#gid=115719017" # noqa E501 llm = OpenAI() -df = SmartDataframe(google_sheets_url, config={"llm": llm}) +df = Agent([google_sheets_url], config={"llm": llm}) response = df.chat("How many short stories are there?") print(response) # Output: 35 diff --git a/examples/from_googlebigquery.py b/examples/from_googlebigquery.py new file mode 100644 index 000000000..74b0f41f8 --- /dev/null +++ b/examples/from_googlebigquery.py @@ -0,0 +1,26 @@ +from pandasai import SmartDataframe + +# A license might be required for using Snowflake with PandasAI +from pandasai.ee.connectors import GoogleBigQueryConnector +from pandasai.llm import OpenAI + +# ENV's +# BIG_QUERY_DATABASE +# KEYFILE_PATH +# PROJECT_ID + +bigquery_connectors = GoogleBigQueryConnector( + config={ + "credentials_path": "credentials.json", + "database": "loan_payments", + "table": "loan_payments", + "projectID": "project_id", + "where": [["Gender", "=", "female"]], + } +) + +llm = OpenAI("OPEN_AI_KEY") +df = SmartDataframe(bigquery_connectors, config={"llm": llm}) + +response = df.chat("How many rows are there in data ?") +print(response) diff --git a/examples/from_parquet.py b/examples/from_parquet.py index 89f9aeb7b..966a1c147 100644 --- a/examples/from_parquet.py +++ b/examples/from_parquet.py @@ -1,13 +1,11 @@ """Example of using PandasAI with a Parquet file.""" -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI llm = OpenAI() -smart_df_read = SmartDataframe( - df="examples/data/Loan payments data.parquet", config={"llm": llm} -) -response = smart_df_read.chat("How many loans are from men and have been paid off?") +agent = Agent(["examples/data/Loan payments data.parquet"], config={"llm": llm}) +response = agent.chat("How many loans are from men and have been paid off?") print(response) # Output: 247 loans have been paid off by men. diff --git a/examples/from_snowflake.py b/examples/from_snowflake.py index 4553e076f..ad38aa157 100644 --- a/examples/from_snowflake.py +++ b/examples/from_snowflake.py @@ -1,7 +1,9 @@ """Example of using PandasAI with a Snowflake""" -from pandasai import SmartDataframe -from pandasai.connectors import SnowFlakeConnector +from pandasai import Agent + +# A license might be required for using Snowflake with PandasAI +from pandasai.ee.connectors import SnowFlakeConnector from pandasai.llm import OpenAI snowflake_connector = SnowFlakeConnector( @@ -22,7 +24,7 @@ ) llm = OpenAI(api_token="OPEN_API_KEY") -df = SmartDataframe(snowflake_connector, config={"llm": llm}) +df = Agent([snowflake_connector], config={"llm": llm}) response = df.chat("How many records has status 'F'?") print(response) diff --git a/examples/from_sql.py b/examples/from_sql.py index 341dbe302..3a9bbae64 100644 --- a/examples/from_sql.py +++ b/examples/from_sql.py @@ -1,6 +1,6 @@ """Example of using PandasAI with a CSV file.""" -from pandasai import SmartDatalake +from pandasai import Agent from pandasai.connectors import MySQLConnector, PostgreSQLConnector, SqliteConnector from pandasai.llm import OpenAI @@ -48,9 +48,7 @@ } ) llm = OpenAI() -df = SmartDatalake( - [loan_connector, payment_connector, invoice_connector], config={"llm": llm} -) +df = Agent([loan_connector, payment_connector, invoice_connector], config={"llm": llm}) response = df.chat("How many people from the United states?") print(response) # Output: 247 loans have been paid off by men. diff --git a/examples/from_yahoo_finance.py b/examples/from_yahoo_finance.py index 966ddcf39..53ecfcb58 100644 --- a/examples/from_yahoo_finance.py +++ b/examples/from_yahoo_finance.py @@ -1,11 +1,11 @@ -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.connectors.yahoo_finance import YahooFinanceConnector from pandasai.llm import OpenAI yahoo_connector = YahooFinanceConnector("MSFT") llm = OpenAI(api_token="OPEN_API_KEY") -df = SmartDataframe(yahoo_connector, config={"llm": llm}) +df = Agent([yahoo_connector], config={"llm": llm}) response = df.chat("What is the closing price for yesterday?") print(response) diff --git a/examples/save_chart.py b/examples/save_chart.py index ccb7e8614..c246c0d20 100644 --- a/examples/save_chart.py +++ b/examples/save_chart.py @@ -5,7 +5,7 @@ import pandas as pd from data.sample_dataframe import dataframe -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.helpers import path from pandasai.llm import OpenAI @@ -19,8 +19,8 @@ user_defined_path = os.getcwd() user_defined_path = os.path.join(user_defined_path, "exports", "charts") -df = SmartDataframe( - df, +df = Agent( + [df], config={ "llm": llm, "save_charts_path": user_defined_path, diff --git a/examples/show_chart.py b/examples/show_chart.py index 306f2de0c..ed4e5b17a 100644 --- a/examples/show_chart.py +++ b/examples/show_chart.py @@ -3,13 +3,13 @@ import pandas as pd from data.sample_dataframe import dataframe -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI df = pd.DataFrame(dataframe) llm = OpenAI() -df = SmartDataframe(df, config={"llm": llm, "verbose": True}) +df = Agent([df], config={"llm": llm, "verbose": True}) response = df.chat( "Plot the histogram of countries showing for each the gpd," " using different colors for each bar", diff --git a/examples/sql_direct_config.py b/examples/sql_direct_config.py index 8ec2128da..9b14431da 100644 --- a/examples/sql_direct_config.py +++ b/examples/sql_direct_config.py @@ -1,9 +1,7 @@ """Example of using PandasAI with a CSV file.""" - -from pandasai import SmartDatalake +from pandasai import Agent from pandasai.connectors import PostgreSQLConnector from pandasai.llm import OpenAI -from pandasai.smart_dataframe import SmartDataframe # With a PostgreSQL database order = PostgreSQLConnector( @@ -39,20 +37,13 @@ } ) - llm = OpenAI("OPEN_API_KEY") -order_details_smart_df = SmartDataframe( - order_details, +agent = Agent( + [order, products, order_details], config={"llm": llm, "direct_sql": True}, - description="Contain user order details", ) - -df = SmartDatalake( - [order_details_smart_df, order, products], - config={"llm": llm, "direct_sql": True}, -) -response = df.chat("return orders with count of distinct products") +response = agent.chat("return orders with count of distinct products") print(response) diff --git a/examples/using_pandasai_log_server.py b/examples/using_pandasai_log_server.py index 1432ca199..e62fb827e 100644 --- a/examples/using_pandasai_log_server.py +++ b/examples/using_pandasai_log_server.py @@ -20,8 +20,8 @@ salaries_df = pd.DataFrame(salaries_data) # Example 1: Using Environment Variables -os.environ["LOGGING_SERVER_URL"] = "SERVER_URL" -os.environ["LOGGING_SERVER_API_KEY"] = "YOUR_API_KEY" +os.environ["PANDASAI_API_URL"] = "SERVER_URL" +os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY" llm = OpenAI("YOUR_API_KEY") diff --git a/examples/using_streamlit.py b/examples/using_streamlit.py index c6b6b9871..891226e0d 100644 --- a/examples/using_streamlit.py +++ b/examples/using_streamlit.py @@ -6,7 +6,7 @@ """ import pandas as pd -from pandasai import SmartDatalake +from pandasai import Agent from pandasai.llm import OpenAI from pandasai.responses.streamlit_response import StreamlitResponse @@ -26,7 +26,7 @@ ) llm = OpenAI() -dl = SmartDatalake( +dl = Agent( [employees_df, salaries_df], config={"llm": llm, "verbose": True, "response_parser": StreamlitResponse}, ) diff --git a/examples/with_azure.py b/examples/with_azure.py index 9c67ac83a..0da001fa6 100644 --- a/examples/with_azure.py +++ b/examples/with_azure.py @@ -3,7 +3,7 @@ import pandas as pd from data.sample_dataframe import dataframe -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import AzureOpenAI df = pd.DataFrame(dataframe) @@ -22,7 +22,7 @@ # is_chat_model=False, # Comment in if you deployed a completion model ) -df = SmartDataframe(df, config={"llm": llm}) +df = Agent([df], config={"llm": llm}) response = df.chat("Calculate the sum of the gdp of north american countries") print(response) # Output: 20901884461056 diff --git a/examples/with_multiple_dataframes.py b/examples/with_multiple_dataframes.py index 2aab78859..9676e1647 100644 --- a/examples/with_multiple_dataframes.py +++ b/examples/with_multiple_dataframes.py @@ -2,7 +2,7 @@ import pandas as pd -from pandasai import SmartDatalake +from pandasai import Agent from pandasai.llm import OpenAI employees_df = pd.DataFrame( @@ -21,7 +21,7 @@ ) llm = OpenAI() -dl = SmartDatalake( +dl = Agent( [employees_df, salaries_df], config={"llm": llm, "verbose": True}, ) diff --git a/examples/with_name_and_description.py b/examples/with_name_and_description.py index aa4bf201f..60022ad55 100644 --- a/examples/with_name_and_description.py +++ b/examples/with_name_and_description.py @@ -3,14 +3,14 @@ import pandas as pd from data.sample_dataframe import dataframe -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI df = pd.DataFrame(dataframe) llm = OpenAI() -df = SmartDataframe( - df=pd.DataFrame(dataframe), +df = Agent( + [pd.DataFrame(dataframe)], name="Countries", description="A dataframe with countries with their GDPs and happiness scores", config={"llm": llm}, diff --git a/examples/with_privacy_enforced.py b/examples/with_privacy_enforced.py index b89638f4e..663289a50 100644 --- a/examples/with_privacy_enforced.py +++ b/examples/with_privacy_enforced.py @@ -2,15 +2,13 @@ import pandas as pd -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import OpenAI from .data.sample_dataframe import dataframe llm = OpenAI() -df = SmartDataframe( - df=pd.DataFrame(dataframe), config={"llm": llm, "enforce_privacy": True} -) +df = Agent([pd.DataFrame(dataframe)], config={"llm": llm, "enforce_privacy": True}) response = df.chat("Calculate the sum of the gdp of north american countries") print(response) # Output: 20901884461056 diff --git a/examples/with_vertexai.py b/examples/with_vertexai.py index 4d150ad75..0b6de5d47 100644 --- a/examples/with_vertexai.py +++ b/examples/with_vertexai.py @@ -4,7 +4,7 @@ import pandas as pd -from pandasai import SmartDataframe +from pandasai import Agent from pandasai.llm import GoogleVertexAI df = pd.read_csv("examples/data/Loan payments data.csv") @@ -15,7 +15,7 @@ llm = GoogleVertexAI( project_id="generative-ai-training", location="us-central1", model="text-bison@001" ) -df = SmartDataframe(df, config={"llm": llm}) +df = Agent([df], config={"llm": llm}) response = df.chat("How many loans are from men and have been paid off?") print(response) # Output: 247 loans have been paid off by men. diff --git a/images/logo.png b/images/logo.png new file mode 100644 index 000000000..2443bb13e Binary files /dev/null and b/images/logo.png differ diff --git a/images/pandas-ai.png b/images/pandas-ai.png deleted file mode 100644 index fb6033bba..000000000 Binary files a/images/pandas-ai.png and /dev/null differ diff --git a/mkdocs.yml b/mkdocs.yml index c26f88a95..0b29b143f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,30 +19,21 @@ nav: - Pipelines: - pipelines/pipelines.md - Advanced usage: - - skills.md - - shortcuts.md - - custom-head.md - - save-dataframes.md - - determinism.md - cache.md + - custom-head.md + - fields-description.md + - train.md - custom-response.md - - custom-instructions.md - - custom-prompts.md - custom-whitelisted-dependencies.md + - skills.md + - determinism.md - Examples: - examples.md - - API: - - API/pandasai.md - - API/prompts.md - - API/llms.md - - API/helpers.md - About: - - Release Notes: release-notes.md - Contributing: CONTRIBUTING.md - - Documents Building: building_docs.md - License: license.md extra: - version: "1.5.21" + version: "2.0" plugins: - search - mkdocstrings: diff --git a/pandasai.json b/pandasai.json index d995c259d..7d091d4d1 100644 --- a/pandasai.json +++ b/pandasai.json @@ -5,12 +5,10 @@ "enable_cache": true, "use_error_correction_framework": true, "max_retries": 3, - "custom_prompts": {}, "open_charts": true, "save_charts": false, "save_charts_path": "exports/charts", "custom_whitelisted_dependencies": [], "llm": "OpenAI", - "llm_options": null, - "saved_dfs": [] + "llm_options": null } diff --git a/pandasai/__init__.py b/pandasai/__init__.py index 3fba900bd..28a8f62ee 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -4,12 +4,13 @@ """ import importlib.metadata +from pandasai.smart_dataframe import SmartDataframe +from pandasai.smart_datalake import SmartDatalake + from .agent import Agent from .engine import set_pd_engine from .helpers.cache import Cache from .skills import skill -from .smart_dataframe import SmartDataframe -from .smart_datalake import SmartDatalake __version__ = importlib.metadata.version(__package__ or __name__) @@ -21,11 +22,11 @@ def clear_cache(filename: str = None): __all__ = [ - "SmartDataframe", - "SmartDatalake", "Agent", "clear_cache", "skill", "set_pd_engine", "pandas", + "SmartDataframe", + "SmartDatalake", ] diff --git a/pandasai/agent/__init__.py b/pandasai/agent/__init__.py index 3250036d5..dcfc8b8d9 100644 --- a/pandasai/agent/__init__.py +++ b/pandasai/agent/__init__.py @@ -1,209 +1,3 @@ -import json -from typing import List, Optional, Union +from .base import Agent -from ..helpers.df_info import DataFrameType -from ..helpers.logger import Logger -from ..helpers.memory import Memory -from ..prompts.base import AbstractPrompt -from ..prompts.check_if_relevant_to_conversation import ( - CheckIfRelevantToConversationPrompt, -) -from ..prompts.clarification_questions_prompt import ClarificationQuestionPrompt -from ..prompts.explain_prompt import ExplainPrompt -from ..prompts.rephase_query_prompt import RephraseQueryPrompt -from ..schemas.df_config import Config -from ..skills import Skill -from ..smart_datalake import SmartDatalake - - -class Agent: - """ - Agent class to improve the conversational experience in PandasAI - """ - - _lake: SmartDatalake = None - _logger: Optional[Logger] = None - - def __init__( - self, - dfs: Union[DataFrameType, List[DataFrameType]], - config: Optional[Union[Config, dict]] = None, - logger: Optional[Logger] = None, - memory_size: int = 10, - ): - """ - Args: - df (Union[DataFrameType, List[DataFrameType]]): DataFrame can be Pandas, - Polars or Database connectors - memory_size (int, optional): Conversation history to use during chat. - Defaults to 1. - """ - - if not isinstance(dfs, list): - dfs = [dfs] - - self._lake = SmartDatalake(dfs, config, logger, memory=Memory(memory_size)) - - # set instance type in SmartDataLake - self._lake.set_instance_type(self.__class__.__name__) - - self._logger = self._lake.logger - - def add_skills(self, *skills: Skill): - """ - Add Skills to PandasAI - """ - self._lake.add_skills(*skills) - - def _call_llm_with_prompt(self, prompt: AbstractPrompt): - """ - Call LLM with prompt using error handling to retry based on config - Args: - prompt (AbstractPrompt): AbstractPrompt to pass to LLM's - """ - retry_count = 0 - while retry_count < self._lake.config.max_retries: - try: - result: str = self._lake.llm.call(prompt) - if prompt.validate(result): - return result - else: - raise Exception("Response validation failed!") - except Exception: - if ( - not self._lake.use_error_correction_framework - or retry_count >= self._lake.config.max_retries - 1 - ): - raise - retry_count += 1 - - def chat(self, query: str, output_type: Optional[str] = None): - """ - Simulate a chat interaction with the assistant on Dataframe. - """ - try: - is_related = self.check_if_related_to_conversation(query) - self._lake.is_related_query(is_related) - return self._lake.chat(query, output_type=output_type) - except Exception as exception: - return ( - "Unfortunately, I was not able to get your answers, " - "because of the following error:\n" - f"\n{exception}\n" - ) - - def add_message(self, message, is_user=False): - """ - Add message to the memory. This is useful when you want to add a message - to the memory without calling the chat function (for example, when you - need to add a message from the agent). - """ - self._lake._memory.add(message, is_user=is_user) - - def check_if_related_to_conversation(self, query: str) -> bool: - """ - Check if the query is related to the previous conversation - """ - if self._lake._memory.count() == 0: - return - - prompt = CheckIfRelevantToConversationPrompt( - conversation=self._lake._memory.get_conversation(), - query=query, - ) - - result = self._call_llm_with_prompt(prompt) - - related = "true" in result - self._logger.log( - f"""Check if the new message is related to the conversation: {related}""" - ) - - if not related: - self._lake.clear_memory() - - return related - - def clarification_questions(self, query: str) -> List[str]: - """ - Generate clarification questions based on the data - """ - prompt = ClarificationQuestionPrompt( - dataframes=self._lake.dfs, - conversation=self._lake._memory.get_conversation(), - query=query, - ) - - result = self._call_llm_with_prompt(prompt) - self._logger.log( - f"""Clarification Questions: {result} - """ - ) - result = result.replace("```json", "").replace("```", "") - questions: list[str] = json.loads(result) - return questions[:3] - - def start_new_conversation(self): - """ - Clears the previous conversation - """ - self._lake.clear_memory() - - def explain(self) -> str: - """ - Returns the explanation of the code how it reached to the solution - """ - try: - prompt = ExplainPrompt( - conversation=self._lake._memory.get_conversation(), - code=self._lake.last_code_executed, - ) - response = self._call_llm_with_prompt(prompt) - self._logger.log( - f"""Explanation: {response} - """ - ) - return response - except Exception as exception: - return ( - "Unfortunately, I was not able to explain, " - "because of the following error:\n" - f"\n{exception}\n" - ) - - def rephrase_query(self, query: str): - try: - prompt = RephraseQueryPrompt( - query=query, - dataframes=self._lake.dfs, - conversation=self._lake._memory.get_conversation(), - ) - - response = self._call_llm_with_prompt(prompt) - self._logger.log( - f"""Rephrased Response: {response} - """ - ) - return response - except Exception as exception: - return ( - "Unfortunately, I was not able to rephrase query, " - "because of the following error:\n" - f"\n{exception}\n" - ) - - @property - def last_code_generated(self): - return self._lake.last_code_generated - - @property - def last_code_executed(self): - return self._lake.last_code_executed - - @property - def last_prompt(self): - return self._lake.last_prompt - - @property - def last_query_log_id(self): - return self._lake.last_query_log_id +__all__ = ["Agent"] diff --git a/pandasai/agent/base.py b/pandasai/agent/base.py new file mode 100644 index 000000000..c429a2e85 --- /dev/null +++ b/pandasai/agent/base.py @@ -0,0 +1,428 @@ +import json +import os +import uuid +from typing import List, Optional, Type, Union + +import pandas as pd + +from pandasai.pipelines.chat.chat_pipeline_input import ( + ChatPipelineInput, +) +from pandasai.vectorstores.vectorstore import VectorStore + +from ..config import load_config_from_json +from ..connectors import BaseConnector, PandasConnector +from ..constants import DEFAULT_CACHE_DIRECTORY, DEFAULT_CHART_DIRECTORY +from ..exceptions import InvalidLLMOutputType, MissingVectorStoreError +from ..helpers.df_info import df_type +from ..helpers.folder import Folder +from ..helpers.logger import Logger +from ..helpers.memory import Memory +from ..llm.base import LLM +from ..llm.langchain import LangchainLLM +from ..pipelines.chat.generate_chat_pipeline import ( + GenerateChatPipeline, +) +from ..pipelines.pipeline_context import PipelineContext +from ..prompts.base import BasePrompt +from ..prompts.check_if_relevant_to_conversation import ( + CheckIfRelevantToConversationPrompt, +) +from ..prompts.clarification_questions_prompt import ClarificationQuestionPrompt +from ..prompts.explain_prompt import ExplainPrompt +from ..prompts.rephase_query_prompt import RephraseQueryPrompt +from ..schemas.df_config import Config +from ..skills import Skill +from .callbacks import Callbacks + + +class Agent: + """ + Agent class to improve the conversational experience in PandasAI + """ + + def __init__( + self, + dfs: Union[ + pd.DataFrame, BaseConnector, List[Union[pd.DataFrame, BaseConnector]] + ], + config: Optional[Union[Config, dict]] = None, + memory_size: Optional[int] = 10, + pipeline: Optional[Type[GenerateChatPipeline]] = None, + vectorstore: Optional[VectorStore] = None, + description: str = None, + ): + """ + Args: + df (Union[pd.DataFrame, List[pd.DataFrame]]): Pandas dataframe + Polars or Database connectors + memory_size (int, optional): Conversation history to use during chat. + Defaults to 1. + """ + self.last_prompt = None + self.last_prompt_id = None + self.last_result = None + self.last_code_generated = None + self.last_code_executed = None + self.agent_info = description + + self.conversation_id = uuid.uuid4() + + dfs = self.get_dfs(dfs) + + # Instantiate the context + config = self.get_config(config) + self.context = PipelineContext( + dfs=dfs, + config=config, + memory=Memory(memory_size, agent_info=description), + vectorstore=vectorstore, + ) + + # Instantiate the logger + self.logger = Logger(save_logs=config.save_logs, verbose=config.verbose) + + # Instantiate the vectorstore + self._vectorstore = vectorstore + + if self._vectorstore is None and os.environ.get("PANDASAI_API_KEY"): + try: + from pandasai.vectorstores.bamboo_vectorstore import BambooVectorStore + except ImportError as e: + raise ImportError( + "Could not import BambooVectorStore. Please install the required dependencies." + ) from e + + self._vectorstore = BambooVectorStore(logger=self.logger) + self.context.vectorstore = self._vectorstore + + callbacks = Callbacks(self) + + self.chat_pipeline = ( + pipeline( + self.context, + self.logger, + on_prompt_generation=callbacks.on_prompt_generation, + on_code_generation=callbacks.on_code_generation, + on_code_execution=callbacks.on_code_execution, + on_result=callbacks.on_result, + ) + if pipeline + else GenerateChatPipeline( + self.context, + self.logger, + on_prompt_generation=callbacks.on_prompt_generation, + on_code_generation=callbacks.on_code_generation, + on_code_execution=callbacks.on_code_execution, + on_result=callbacks.on_result, + ) + ) + + self.configure() + + def configure(self): + config = self.context.config + + # Add project root path if save_charts_path is default + if config.save_charts and config.save_charts_path == DEFAULT_CHART_DIRECTORY: + Folder.create(config.save_charts_path) + + # Add project root path if cache_path is default + if config.enable_cache: + Folder.create(DEFAULT_CACHE_DIRECTORY) + + def get_config(self, config: Union[Config, dict]): + """ + Load a config to be used to run the queries. + + Args: + config (Union[Config, dict]): Config to be used + """ + + config = load_config_from_json(config) + + if isinstance(config, dict) and config.get("llm") is None: + config["llm"] = self.get_llm(config["llm"]) + + config = Config(**config) + + return config + + def get_llm(self, llm: LLM) -> LLM: + """ + Load a LLM to be used to run the queries. + Check if it is a PandasAI LLM or a Langchain LLM. + If it is a Langchain LLM, wrap it in a PandasAI LLM. + + Args: + llm (object): LLMs option to be used for API access + + Raises: + BadImportError: If the LLM is a Langchain LLM but the langchain package + is not installed + """ + if LangchainLLM.is_langchain_llm(llm): + llm = LangchainLLM(llm) + + return llm + + def get_dfs( + self, + dfs: Union[ + pd.DataFrame, BaseConnector, List[Union[pd.DataFrame, BaseConnector]] + ], + ): + """ + Load all the dataframes to be used in the agent. + + Args: + dfs (List[Union[pd.DataFrame, Any]]): Pandas dataframe + """ + # Inline import to avoid circular import + from pandasai.smart_dataframe import SmartDataframe + + # If only one dataframe is passed, convert it to a list + if not isinstance(dfs, list): + dfs = [dfs] + + connectors = [] + for df in dfs: + if isinstance(df, BaseConnector): + connectors.append(df) + elif isinstance(df, (pd.DataFrame, pd.Series, list, dict, str)): + connectors.append(PandasConnector({"original_df": df})) + elif df_type(df) == "modin": + connectors.append(PandasConnector({"original_df": df})) + elif isinstance(df, SmartDataframe) and isinstance( + df.dataframe, BaseConnector + ): + connectors.append(df.dataframe) + else: + try: + import polars as pl + + if isinstance(df, pl.DataFrame): + from ..connectors.polars import PolarsConnector + + connectors.append(PolarsConnector({"original_df": df})) + + else: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) + except ImportError as e: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) from e + return connectors + + def add_skills(self, *skills: Skill): + """ + Add Skills to PandasAI + """ + self.context.skills_manager.add_skills(*skills) + + def call_llm_with_prompt(self, prompt: BasePrompt): + """ + Call LLM with prompt using error handling to retry based on config + Args: + prompt (BasePrompt): BasePrompt to pass to LLM's + """ + retry_count = 0 + while retry_count < self.context.config.max_retries: + try: + result: str = self.context.config.llm.call(prompt) + if prompt.validate(result): + return result + else: + raise InvalidLLMOutputType("Response validation failed!") + except Exception: + if ( + not self.context.config.use_error_correction_framework + or retry_count >= self.context.config.max_retries - 1 + ): + raise + retry_count += 1 + + def chat(self, query: str, output_type: Optional[str] = None): + """ + Simulate a chat interaction with the assistant on Dataframe. + """ + try: + self.logger.log(f"Question: {query}") + self.logger.log( + f"Running PandasAI with {self.context.config.llm.type} LLM..." + ) + + self.assign_prompt_id() + + pipeline_input = ChatPipelineInput( + query, output_type, self.conversation_id, self.last_prompt_id + ) + + return self.chat_pipeline.run(pipeline_input) + except Exception as exception: + return ( + "Unfortunately, I was not able to get your answers, " + "because of the following error:\n" + f"\n{exception}\n" + ) + + def train( + self, + queries: Optional[List[str]] = None, + codes: Optional[List[str]] = None, + docs: Optional[List[str]] = None, + ) -> None: + """ + Trains the context to be passed to model + Args: + queries (Optional[str], optional): user user + codes (Optional[str], optional): generated code + docs (Optional[List[str]], optional): additional docs + Raises: + ImportError: if default vector db lib is not installed it raises an error + """ + if self._vectorstore is None: + raise MissingVectorStoreError( + "No vector store provided. Please provide a vector store to train the agent." + ) + + if (queries is not None and codes is None) or ( + queries is None and codes is not None + ): + raise ValueError( + "If either queries or codes are provided, both must be provided." + ) + + if docs is not None: + self._vectorstore.add_docs(docs) + + if queries and codes: + self._vectorstore.add_question_answer(queries, codes) + + self.logger.log("Agent successfully trained on the data") + + def clear_memory(self): + """ + Clears the memory + """ + self.context.memory.clear() + self.conversation_id = uuid.uuid4() + + def add_message(self, message, is_user=False): + """ + Add message to the memory. This is useful when you want to add a message + to the memory without calling the chat function (for example, when you + need to add a message from the agent). + """ + self.context.memory.add(message, is_user=is_user) + + def assign_prompt_id(self): + """Assign a prompt ID""" + + self.last_prompt_id = uuid.uuid4() + + if self.logger: + self.logger.log(f"Prompt ID: {self.last_prompt_id}") + + def check_if_related_to_conversation(self, query: str) -> bool: + """ + Check if the query is related to the previous conversation + """ + if self.context.memory.count() == 0: + return + + prompt = CheckIfRelevantToConversationPrompt( + context=self.context, + query=query, + ) + + result = self.call_llm_with_prompt(prompt) + + is_related = "true" in result + self.logger.log( + f"""Check if the new message is related to the conversation: {is_related}""" + ) + + if not is_related: + self.clear_memory() + + return is_related + + def clarification_questions(self, query: str) -> List[str]: + """ + Generate clarification questions based on the data + """ + prompt = ClarificationQuestionPrompt( + context=self.context, + query=query, + ) + + result = self.call_llm_with_prompt(prompt) + self.logger.log( + f"""Clarification Questions: {result} + """ + ) + result = result.replace("```json", "").replace("```", "") + questions: list[str] = json.loads(result) + return questions[:3] + + def start_new_conversation(self): + """ + Clears the previous conversation + """ + self.clear_memory() + + def explain(self) -> str: + """ + Returns the explanation of the code how it reached to the solution + """ + try: + prompt = ExplainPrompt( + context=self.context, + code=self.last_code_executed, + ) + response = self.call_llm_with_prompt(prompt) + self.logger.log( + f"""Explanation: {response} + """ + ) + return response + except Exception as exception: + return ( + "Unfortunately, I was not able to explain, " + "because of the following error:\n" + f"\n{exception}\n" + ) + + def rephrase_query(self, query: str): + try: + prompt = RephraseQueryPrompt( + context=self.context, + query=query, + ) + response = self.call_llm_with_prompt(prompt) + self.logger.log( + f"""Rephrased Response: {response} + """ + ) + return response + except Exception as exception: + return ( + "Unfortunately, I was not able to rephrase query, " + "because of the following error:\n" + f"\n{exception}\n" + ) + + @property + def logs(self): + return self.logger.logs + + @property + def last_error(self): + return self.chat_pipeline.last_error + + @property + def last_query_log_id(self): + return self.chat_pipeline.get_last_track_log_id() diff --git a/pandasai/agent/callbacks.py b/pandasai/agent/callbacks.py new file mode 100644 index 000000000..1b4b5db6c --- /dev/null +++ b/pandasai/agent/callbacks.py @@ -0,0 +1,42 @@ +from ..prompts import BasePrompt + + +class Callbacks: + def __init__(self, agent): + self.agent = agent + + def on_prompt_generation(self, prompt: BasePrompt) -> str: + """ + A method to be called after prompt generation. + + Args: + prompt (str): A prompt + """ + self.agent.last_prompt = str(prompt) + + def on_code_generation(self, code: str): + """ + A method to be called after code generation. + + Args: + code (str): A python code + """ + self.agent.last_code_generated = code + + def on_code_execution(self, code: str): + """ + A method to be called after code execution. + + Args: + code (str): A python code + """ + self.agent.last_code_executed = code + + def on_result(self, result): + """ + A method to be called after code execution. + + Args: + result (Any): A python code + """ + self.agent.last_result = result diff --git a/pandasai/assets/prompt_templates/correct_error_prompt.tmpl b/pandasai/assets/prompt_templates/correct_error_prompt.tmpl deleted file mode 100644 index 6ab1c4249..000000000 --- a/pandasai/assets/prompt_templates/correct_error_prompt.tmpl +++ /dev/null @@ -1,12 +0,0 @@ -{dataframes} - -The user asked the following question: -{conversation} - -You generated this python code: -{code} - -It fails with the following error: -{error_returned} - -Fix the python code above and return the new python code: \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/correct_output_type_error_prompt.tmpl b/pandasai/assets/prompt_templates/correct_output_type_error_prompt.tmpl deleted file mode 100644 index de9a97459..000000000 --- a/pandasai/assets/prompt_templates/correct_output_type_error_prompt.tmpl +++ /dev/null @@ -1,9 +0,0 @@ -{dataframes} - -The user asked the following question: -{conversation} - -You generated this python code: -{code} - -Fix the python code above and return the new python code but the result type should be: {output_type_hint} \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/current_code.tmpl b/pandasai/assets/prompt_templates/current_code.tmpl deleted file mode 100644 index 6ac77c5f5..000000000 --- a/pandasai/assets/prompt_templates/current_code.tmpl +++ /dev/null @@ -1,6 +0,0 @@ -# TODO: import the required dependencies -{default_import} - -# Write code here - -# Declare result var: {output_type_hint} \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/direct_sql_connector.tmpl b/pandasai/assets/prompt_templates/direct_sql_connector.tmpl deleted file mode 100644 index 57885a302..000000000 --- a/pandasai/assets/prompt_templates/direct_sql_connector.tmpl +++ /dev/null @@ -1,20 +0,0 @@ - -{tables} - - -You can call the following functions that have been pre-defined for you: - -def execute_sql_query(sql_query: str) -> pd.Dataframe - """This method connects to the database, executes the sql query and returns the dataframe""" - -{skills} - -{prev_conversation} - -{code_description} -```python -{current_code} -``` - -{last_message} -{reasoning} \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/generate_python_code.tmpl b/pandasai/assets/prompt_templates/generate_python_code.tmpl deleted file mode 100644 index ba531b57b..000000000 --- a/pandasai/assets/prompt_templates/generate_python_code.tmpl +++ /dev/null @@ -1,13 +0,0 @@ -{dataframes} -{skills} - -{prev_conversation} - -{code_description} -```python -{current_code} -``` - -{last_message} -Variable `dfs: list[pd.DataFrame]` is already declared. -{reasoning} \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/generate_synthetic_data.tmpl b/pandasai/assets/prompt_templates/generate_synthetic_data.tmpl deleted file mode 100644 index 0d5fa7c05..000000000 --- a/pandasai/assets/prompt_templates/generate_synthetic_data.tmpl +++ /dev/null @@ -1,7 +0,0 @@ -Please extend the head above and generate {amount} examples of this dataframe where each datapoint -is unique, try to use statistical terms like mean, mode, median to generate numerical data -and wide range of text inputs for textual data use faker. - -{dataframe} - -Use pd.DataFrame.from_records to construct dataframe and return the code that generates the dataframe in df variable. \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/rephrase_query_prompt.tmpl b/pandasai/assets/prompt_templates/rephrase_query_prompt.tmpl deleted file mode 100644 index ce9217d8b..000000000 --- a/pandasai/assets/prompt_templates/rephrase_query_prompt.tmpl +++ /dev/null @@ -1,17 +0,0 @@ - -You are provided with the following pandas DataFrames: - -{dataframes} -{conversation} - -Use the provided dataframe and conversation we have had to Return the rephrased -sentence of "{query}” in order to obtain more accurate and comprehensive responses -without any explanations. -""" - - conversation_text: str = """ -And based on our conversation: - - -{conversation} - \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/simple_reasoning.tmpl b/pandasai/assets/prompt_templates/simple_reasoning.tmpl deleted file mode 100644 index e01c8e9f4..000000000 --- a/pandasai/assets/prompt_templates/simple_reasoning.tmpl +++ /dev/null @@ -1,6 +0,0 @@ - -At the end, declare "result" variable as a dictionary of type and value. -{viz_library_type} -{instructions} - -Generate python code and return full updated code: \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/viz_library.tmpl b/pandasai/assets/prompt_templates/viz_library.tmpl deleted file mode 100644 index c01306e4c..000000000 --- a/pandasai/assets/prompt_templates/viz_library.tmpl +++ /dev/null @@ -1 +0,0 @@ -If you are asked to plot a chart, use "{library}" for charts, save as png. \ No newline at end of file diff --git a/pandasai/config.py b/pandasai/config.py index b0f915a0e..2282161c8 100644 --- a/pandasai/config.py +++ b/pandasai/config.py @@ -6,7 +6,7 @@ from .schemas.df_config import Config -def load_config( +def load_config_from_json( override_config: Optional[Union[Config, dict]] = None, ): """ diff --git a/pandasai/connectors/__init__.py b/pandasai/connectors/__init__.py index 10019105f..816fe5f4b 100644 --- a/pandasai/connectors/__init__.py +++ b/pandasai/connectors/__init__.py @@ -6,8 +6,8 @@ from .airtable import AirtableConnector from .base import BaseConnector -from .databricks import DatabricksConnector -from .snowflake import SnowFlakeConnector +from .pandas import PandasConnector +from .polars import PolarsConnector from .sql import MySQLConnector, PostgreSQLConnector, SQLConnector, SqliteConnector from .yahoo_finance import YahooFinanceConnector @@ -17,8 +17,8 @@ "MySQLConnector", "PostgreSQLConnector", "YahooFinanceConnector", - "SnowFlakeConnector", - "DatabricksConnector", "AirtableConnector", "SqliteConnector", + "PandasConnector", + "PolarsConnector", ] diff --git a/pandasai/connectors/airtable.py b/pandasai/connectors/airtable.py index 76173b452..4a1057733 100644 --- a/pandasai/connectors/airtable.py +++ b/pandasai/connectors/airtable.py @@ -14,7 +14,17 @@ from ..exceptions import InvalidRequestError from ..helpers.path import find_project_root -from .base import AirtableConnectorConfig, BaseConnector, BaseConnectorConfig +from .base import BaseConnector, BaseConnectorConfig + + +class AirtableConnectorConfig(BaseConnectorConfig): + """ + Connecter configuration for Airtable data. + """ + + api_key: str + base_id: str + database: str = "airtable_data" class AirtableConnector(BaseConnector): @@ -30,6 +40,7 @@ def __init__( self, config: Optional[Union[AirtableConnectorConfig, dict]] = None, cache_interval: int = 600, + **kwargs, ): if isinstance(config, dict): if "token" in config and "base_id" in config and "table" in config: @@ -52,7 +63,7 @@ def __init__( self._root_url: str = "https://api.airtable.com/v0/" self._cache_interval = cache_interval - super().__init__(config) + super().__init__(config, **kwargs) def _init_connection(self, config: BaseConnectorConfig): """ @@ -88,7 +99,7 @@ def _get_cache_path(self, include_additional_filters: bool = False): cache_dir = os.path.join((find_project_root()), "cache") except ValueError: cache_dir = os.path.join(os.getcwd(), "cache") - return os.path.join(cache_dir, f"{self._config.table}_data.parquet") + return os.path.join(cache_dir, f"{self.config.table}_data.parquet") def _cached(self, include_additional_filters: bool = False): """ @@ -137,14 +148,14 @@ def fallback_name(self): Returns : str : The fallback table name of the connector. """ - return self._config.table + return self.config.table - def execute(self): + def execute(self) -> pd.DataFrame: """ Execute the connector and return the result. Returns: - DataFrameType: The result of the connector. + pd.DataFrame: The result of the connector. """ if cached := self._cached() or self._cached(include_additional_filters=True): return pd.read_parquet(cached) @@ -162,17 +173,17 @@ def _build_formula(self): """ condition_strings = [] - if self._config.where is not None: - for i in self._config.where: + if self.config.where is not None: + for i in self.config.where: filter_query = f"{i[0]}{i[1]}'{i[2]}'" condition_strings.append(filter_query) return f'AND({",".join(condition_strings)})' def _request_api(self, params): - url = f"{self._root_url}{self._config.base_id}/{self._config.table}" + url = f"{self._root_url}{self.config.base_id}/{self.config.table}" return requests.get( url=url, - headers={"Authorization": f"Bearer {self._config.token}"}, + headers={"Authorization": f"Bearer {self.config.api_key}"}, params=params, ) @@ -183,7 +194,7 @@ def _fetch_data(self): params = {"pageSize": 100, "offset": "0"} - if self._config.where is not None: + if self.config.where is not None: params["filterByFormula"] = self._build_formula() data = [] @@ -209,7 +220,7 @@ def _fetch_data(self): return pd.DataFrame(data) @cache - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the table that the connector is connected to. @@ -218,7 +229,7 @@ def head(self): DatFrameType: The head of the data source that the connector is connected to . """ - data = self._request_api(params={"maxRecords": 5}) + data = self._request_api(params={"maxRecords": n}) return pd.DataFrame( [ {"id": record["id"], **record["fields"]} diff --git a/pandasai/connectors/base.py b/pandasai/connectors/base.py index 3b8626021..91244c4f1 100644 --- a/pandasai/connectors/base.py +++ b/pandasai/connectors/base.py @@ -2,13 +2,19 @@ Base connector class to be extended by all connectors. """ +import json import os from abc import ABC, abstractmethod -from typing import Optional, Union - +from functools import cache +from typing import Union + +import pandasai.pandas as pd +from pandasai.helpers.dataframe_serializer import ( + DataframeSerializer, + DataframeSerializerType, +) from pandasai.pydantic import BaseModel -from ..helpers.df_info import DataFrameType from ..helpers.logger import Logger @@ -22,90 +28,22 @@ class BaseConnectorConfig(BaseModel): where: list[list[str]] = None -class AirtableConnectorConfig(BaseConnectorConfig): - """ - Connecter configuration for Airtable data. - """ - - token: str - base_id: str - database: str = "airtable_data" - - -class SQLBaseConnectorConfig(BaseConnectorConfig): - """ - Base Connector configuration. - """ - - driver: Optional[str] = None - dialect: Optional[str] = None - - -class SqliteConnectorConfig(SQLBaseConnectorConfig): - """ - Connector configurations for sqlite db. - """ - - table: str - database: str - - -class YahooFinanceConnectorConfig(BaseConnectorConfig): - """ - Connector configuration for Yahoo Finance. - """ - - dialect: str = "yahoo_finance" - host: str = "yahoo.finance.com" - database: str = "stock_data" - host: str - - -class SQLConnectorConfig(SQLBaseConnectorConfig): - """ - Connector configuration. - """ - - host: str - port: int - username: str - password: str - - -class SnowFlakeConnectorConfig(SQLBaseConnectorConfig): - """ - Connector configuration for SnowFlake. - """ - - account: str - database: str - username: str - password: str - dbSchema: str - warehouse: str - - -class DatabricksConnectorConfig(SQLBaseConnectorConfig): - """ - Connector configuration for DataBricks. - """ - - host: str - port: int - token: str - httpPath: str - - class BaseConnector(ABC): """ Base connector class to be extended by all connectors. """ - _config: BaseConnectorConfig = None _logger: Logger = None _additional_filters: list[list[str]] = None - def __init__(self, config: Union[BaseConnectorConfig, dict]): + def __init__( + self, + config: Union[BaseConnectorConfig, dict], + name: str = None, + description: str = None, + custom_head: pd.DataFrame = None, + field_descriptions: dict = None, + ): """ Initialize the connector with the given configuration. @@ -115,7 +53,11 @@ def __init__(self, config: Union[BaseConnectorConfig, dict]): if isinstance(config, dict): config = self._load_connector_config(config) - self._config = config + self.config = config + self.name = name + self.description = description + self.custom_head = custom_head + self.field_descriptions = field_descriptions def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): """Loads passed Configuration to object @@ -155,7 +97,7 @@ def _init_connection(self, config: BaseConnectorConfig): pass @abstractmethod - def head(self): + def head(self, n: int = 3) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. This information is passed to the LLM to provide the schema of the @@ -164,7 +106,7 @@ def head(self): pass @abstractmethod - def execute(self) -> DataFrameType: + def execute(self) -> pd.DataFrame: """ Execute the given query on the data source that the connector is connected to. @@ -210,10 +152,10 @@ def path(self): Return the path of the data source that the connector is connected to. """ # JDBC string - path = f"{self.__class__.__name__}://{self._config.host}:" - if hasattr(self._config, "port"): - path += str(self._config.port) - path += f"/{self._config.database}/{self._config.table}" + path = f"{self.__class__.__name__}://{self.config.host}:" + if hasattr(self.config, "port"): + path += str(self.config.port) + path += f"/{self.config.database}/{self.config.table}" return path @property @@ -239,3 +181,107 @@ def fallback_name(self): Return the name of the table that the connector is connected to. """ raise NotImplementedError + + @property + def pandas_df(self): + """ + Returns the pandas dataframe + """ + raise NotImplementedError + + def equals(self, other): + return self.__dict__ == other.__dict__ + + @cache + def get_head(self, n: int = 3) -> pd.DataFrame: + """ + Return the head of the data source that the connector is connected to. + This information is passed to the LLM to provide the schema of the + data source. + + Args: + n (int, optional): The number of rows to return. Defaults to 5. + + Returns: + pd.DataFrame: The head of the data source that the connector is + connected to. + """ + return self.custom_head if self.custom_head is not None else self.head(n) + + def head_with_truncate_columns(self, max_size=25) -> pd.DataFrame: + """ + Truncate the columns of the dataframe to a maximum of 20 characters. + + Args: + df (pd.DataFrame): The dataframe to truncate the columns of. + + Returns: + pd.DataFrame: The dataframe with truncated columns. + """ + df_trunc = self.get_head().copy() + + for col in df_trunc.columns: + if df_trunc[col].dtype == "object": + first_val = df_trunc[col].iloc[0] + if isinstance(first_val, str) and len(first_val) > max_size: + df_trunc[col] = f"{df_trunc[col].str.slice(0, max_size - 3)}..." + + return df_trunc + + @cache + def get_schema(self) -> pd.DataFrame: + """ + A sample of the dataframe. + + Returns: + pd.DataFrame: A sample of the dataframe. + """ + if self.get_head() is None: + return None + + if len(self.get_head()) > 0: + return self.head_with_truncate_columns() + + return self.get_head() + + @cache + def to_csv(self) -> str: + """ + A proxy-call to the dataframe's `.to_csv()`. + + Returns: + str: The dataframe as a CSV string. + """ + return self.get_head().to_csv(index=False) + + @cache + def to_string( + self, + index: int = 0, + is_direct_sql: bool = False, + serializer: DataframeSerializerType = None, + ) -> str: + """ + Convert dataframe to string + Returns: + str: dataframe string + """ + return DataframeSerializer().serialize( + self, + extras={ + "index": index, + "type": "sql" if is_direct_sql else "pd.DataFrame", + "is_direct_sql": is_direct_sql, + }, + type_=serializer, + ) + + @cache + def to_json(self): + df_head = self.get_head() + + return { + "name": self.name, + "description": self.description, + "head": json.loads(df_head.to_json(orient="records", date_format="iso")), + } diff --git a/pandasai/connectors/pandas.py b/pandasai/connectors/pandas.py new file mode 100644 index 000000000..9179c389f --- /dev/null +++ b/pandasai/connectors/pandas.py @@ -0,0 +1,154 @@ +""" +Pandas connector class to handle csv, parquet, xlsx files and pandas dataframes. +""" + +import hashlib +from functools import cache, cached_property +from typing import Union + +from pydantic import BaseModel + +import pandasai.pandas as pd + +from ..helpers.data_sampler import DataSampler +from ..helpers.file_importer import FileImporter +from ..helpers.logger import Logger +from .base import BaseConnector + + +class PandasConnectorConfig(BaseModel): + """ + Pandas Connector configuration. + """ + + original_df: Union[pd.DataFrame, pd.Series, str, list, dict] + + class Config: + arbitrary_types_allowed = True + + +class PandasConnector(BaseConnector): + """ + Pandas connector class to handle csv, parquet, xlsx files and pandas dataframes. + """ + + pandas_df = pd.DataFrame + _logger: Logger = None + _additional_filters: list[list[str]] = None + + def __init__( + self, + config: Union[PandasConnectorConfig, dict], + **kwargs, + ): + """ + Initialize the Pandas connector with the given configuration. + + Args: + config (PandasConnectorConfig): The configuration for the Pandas connector. + """ + super().__init__(config, **kwargs) + + self._load_df(self.config.original_df) + + def _load_df(self, df: Union[pd.DataFrame, pd.Series, str, list, dict]): + """ + Load the dataframe from a file or pandas dataframe. + + Args: + df (Union[pd.DataFrame, pd.Series, str, list, dict]): The dataframe to load. + """ + if isinstance(df, pd.Series): + self.pandas_df = df.to_frame() + elif isinstance(df, pd.DataFrame): + self.pandas_df = df + elif isinstance(df, (list, dict)): + try: + self.pandas_df = pd.DataFrame(df) + except Exception as e: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) from e + elif isinstance(df, str): + self.pandas_df = FileImporter.import_from_file(df) + else: + raise ValueError("Invalid input data. We cannot convert it to a dataframe.") + + def _load_connector_config( + self, config: Union[PandasConnectorConfig, dict] + ) -> PandasConnectorConfig: + """ + Loads passed Configuration to object + + Args: + config (PandasConnectorConfig): Construct config in structure + + Returns: + config: PandasConnectorConfig + """ + return PandasConnectorConfig(**config) + + @cache + def head(self, n: int = 5) -> pd.DataFrame: + """ + Return the head of the data source that the connector is connected to. + This information is passed to the LLM to provide the schema of the + data source. + """ + sampler = DataSampler(self.pandas_df) + return sampler.sample(n) + + @cache + def execute(self) -> pd.DataFrame: + """ + Execute the given query on the data source that the connector is + connected to. + """ + return self.pandas_df + + @cached_property + def rows_count(self): + """ + Return the number of rows in the data source that the connector is + connected to. + """ + return len(self.pandas_df) + + @cached_property + def columns_count(self): + """ + Return the number of columns in the data source that the connector is + connected to. + """ + return len(self.pandas_df.columns) + + @property + def column_hash(self): + """ + Return the hash code that is unique to the columns of the data source + that the connector is connected to. + """ + columns_str = "".join(self.pandas_df.columns) + hash_object = hashlib.sha256(columns_str.encode()) + return hash_object.hexdigest() + + @cached_property + def path(self): + """ + Return the path of the data source that the connector is connected to. + """ + pass + + @property + def fallback_name(self): + """ + Return the name of the table that the connector is connected to. + """ + pass + + def equals(self, other: BaseConnector): + """ + Return whether the data source that the connector is connected to is + equal to the other data source. + """ + return self._original_df.equals(other._original_df) diff --git a/pandasai/connectors/polars.py b/pandasai/connectors/polars.py new file mode 100644 index 000000000..75f10ecba --- /dev/null +++ b/pandasai/connectors/polars.py @@ -0,0 +1,149 @@ +""" +Polars connector class to handle csv, parquet, xlsx files and polars dataframes. +""" + +import hashlib +from functools import cache, cached_property +from typing import Union + +import polars as pl +from pydantic import BaseModel + +from ..helpers.data_sampler import DataSampler +from ..helpers.file_importer import FileImporter +from ..helpers.logger import Logger +from .base import BaseConnector + + +class PolarsConnectorConfig(BaseModel): + """ + Polars Connector configuration. + """ + + original_df: pl.DataFrame + + class Config: + arbitrary_types_allowed = True + + +class PolarsConnector(BaseConnector): + """ + Polars connector class to handle csv, parquet, xlsx files and polars dataframes. + """ + + pandas_df = pl.DataFrame + _logger: Logger = None + _additional_filters: list[list[str]] = None + + def __init__( + self, + config: Union[PolarsConnectorConfig, dict], + **kwargs, + ): + """ + Initialize the Polars connector with the given configuration. + + Args: + config (PolarsConnectorConfig): The configuration for the Polars connector. + """ + super().__init__(config, **kwargs) + + self._load_df(self.config.original_df) + + def _load_df(self, df: Union[pl.DataFrame, pl.Series, str, dict]): + """ + Load the dataframe from a file or polars dataframe. + + Args: + df (Union[pl.DataFrame, pl.Series, str, dict]): The dataframe to load. + """ + polars_df = None + if isinstance(df, pl.Series): + polars_df = df.to_frame() + elif isinstance(df, pl.DataFrame): + polars_df = df + elif isinstance(df, str): + polars_df = FileImporter.import_from_file(df) + elif isinstance(df, dict): + try: + polars_df = pl.DataFrame(df) + except Exception as e: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) from e + else: + raise ValueError("Invalid input data. We cannot convert it to a dataframe.") + + self.pandas_df = polars_df.to_pandas() + + def _load_connector_config( + self, config: Union[PolarsConnectorConfig, dict] + ) -> PolarsConnectorConfig: + """ + Loads passed Configuration to object + + Args: + config (PolarsConnectorConfig): Construct config in structure + + Returns: + config: PolarsConnectorConfig + """ + return PolarsConnectorConfig(**config) + + @cache + def head(self, n: int = 5) -> pl.DataFrame: + """ + Return the head of the data source that the connector is connected to. + This information is passed to the LLM to provide the schema of the + data source. + """ + sampler = DataSampler(self.pandas_df) + return sampler.sample(n) + + @cache + def execute(self) -> pl.DataFrame: + """ + Execute the given query on the data source that the connector is + connected to. + """ + return self.pandas_df + + @cached_property + def rows_count(self): + """ + Return the number of rows in the data source that the connector is + connected to. + """ + return len(self.pandas_df) + + @cached_property + def columns_count(self): + """ + Return the number of columns in the data source that the connector is + connected to. + """ + return len(self.pandas_df.columns) + + @property + def column_hash(self): + """ + Return the hash code that is unique to the columns of the data source + that the connector is connected to. + """ + columns_str = "".join(self.pandas_df.columns) + hash_object = hashlib.sha256(columns_str.encode()) + return hash_object.hexdigest() + + @cached_property + def path(self): + """ + Return the path of the data source that the connector is connected to. + """ + pass + + @property + def fallback_name(self): + """ + Return the name of the table that the connector is connected to. + """ + pass diff --git a/pandasai/connectors/sql.py b/pandasai/connectors/sql.py index 6572af78d..7cc0e8d45 100644 --- a/pandasai/connectors/sql.py +++ b/pandasai/connectors/sql.py @@ -7,22 +7,46 @@ import re import time from functools import cache, cached_property -from typing import Union +from typing import Optional, Union from sqlalchemy import asc, create_engine, select, text from sqlalchemy.engine import Connection import pandasai.pandas as pd from pandasai.exceptions import MaliciousQueryError +from pandasai.helpers.path import find_project_root from ..constants import DEFAULT_FILE_PERMISSIONS -from ..helpers.path import find_project_root -from .base import ( - BaseConnector, - BaseConnectorConfig, - SQLConnectorConfig, - SqliteConnectorConfig, -) +from .base import BaseConnector, BaseConnectorConfig + + +class SQLBaseConnectorConfig(BaseConnectorConfig): + """ + Base Connector configuration. + """ + + driver: Optional[str] = None + dialect: Optional[str] = None + + +class SqliteConnectorConfig(SQLBaseConnectorConfig): + """ + Connector configurations for sqlite db. + """ + + table: str + database: str + + +class SQLConnectorConfig(SQLBaseConnectorConfig): + """ + Connector configuration. + """ + + host: str + port: int + username: str + password: str class SQLConnector(BaseConnector): @@ -37,7 +61,10 @@ class SQLConnector(BaseConnector): _cache_interval: int = 600 # 10 minutes def __init__( - self, config: Union[BaseConnectorConfig, dict], cache_interval: int = 600 + self, + config: Union[BaseConnectorConfig, dict], + cache_interval: int = 600, + **kwargs, ): """ Initialize the SQL connector with the given configuration. @@ -46,7 +73,7 @@ def __init__( config (ConnectorConfig): The configuration for the SQL connector. """ config = self._load_connector_config(config) - super().__init__(config) + super().__init__(config, **kwargs) if config.dialect is None: raise Exception("SQL dialect must be specified") @@ -55,6 +82,9 @@ def __init__( self._cache_interval = cache_interval + # Table to equal to table name for sql connectors + self.name = self.fallback_name + def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): """ Loads passed Configuration to object @@ -103,10 +133,10 @@ def __repr__(self): str: The string representation of the SQL connector. """ return ( - f"<{self.__class__.__name__} dialect={self._config.dialect} " - f"driver={self._config.driver} host={self._config.host} " - f"port={str(self._config.port)} database={self._config.database} " - f"table={self._config.table}>" + f"<{self.__class__.__name__} dialect={self.config.dialect} " + f"driver={self.config.driver} host={self.config.host} " + f"port={str(self.config.port)} database={self.config.database} " + f"table={self.config.table}>" ) def _validate_column_name(self, column_name): @@ -115,12 +145,12 @@ def _validate_column_name(self, column_name): raise ValueError(f"Invalid column name: {column_name}") def _build_query(self, limit=None, order=None): - base_query = select("*").select_from(text(self._config.table)) - if self._config.where or self._additional_filters: + base_query = select("*").select_from(text(self.config.table)) + if self.config.where or self._additional_filters: # conditions is the list of where + additional filters conditions = [] - if self._config.where: - conditions += self._config.where + if self.config.where: + conditions += self.config.where if self._additional_filters: conditions += self._additional_filters @@ -153,7 +183,7 @@ def _build_query(self, limit=None, order=None): return base_query @cache - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. This information is passed to the LLM to provide the schema of the data source. @@ -164,12 +194,12 @@ def head(self): if self.logger: self.logger.log( - f"Getting head of {self._config.table} " - f"using dialect {self._config.dialect}" + f"Getting head of {self.config.table} " + f"using dialect {self.config.dialect}" ) # Run a SQL query to get all the columns names and 5 random rows - query = self._build_query(limit=5, order="RAND()") + query = self._build_query(limit=n, order="RAND()") # Return the head of the data source return pd.read_sql(query, self._connection) @@ -260,8 +290,8 @@ def execute(self): if self.logger: self.logger.log( - f"Loading the table {self._config.table} " - f"using dialect {self._config.dialect}" + f"Loading the table {self.config.table} " + f"using dialect {self.config.dialect}" ) # Run a SQL query to get all the results @@ -291,12 +321,12 @@ def rows_count(self): if self.logger: self.logger.log( "Getting the number of rows in the table " - f"{self._config.table} using dialect " - f"{self._config.dialect}" + f"{self.config.table} using dialect " + f"{self.config.dialect}" ) # Run a SQL query to get the number of rows - query = select(text("COUNT(*)")).select_from(text(self._config.table)) + query = select(text("COUNT(*)")).select_from(text(self.config.table)) # Return the number of rows self._rows_count = self._connection.execute(query).fetchone()[0] @@ -317,8 +347,8 @@ def columns_count(self): if self.logger: self.logger.log( "Getting the number of columns in the table " - f"{self._config.table} using dialect " - f"{self._config.dialect}" + f"{self.config.table} using dialect " + f"{self.config.dialect}" ) self._columns_count = len(self.head().columns) @@ -339,14 +369,14 @@ def _get_column_hash(self, include_additional_filters: bool = False): # Return the hash of the columns and the where clause columns_str = "".join(self.head().columns) if ( - self._config.where + self.config.where or include_additional_filters and self._additional_filters is not None ): columns_str += "WHERE" - if self._config.where: + if self.config.where: # where clause is a list of lists - for condition in self._config.where: + for condition in self.config.where: columns_str += f"{condition[0]} {condition[1]} {condition[2]}" if include_additional_filters and self._additional_filters: for condition in self._additional_filters: @@ -367,24 +397,24 @@ def column_hash(self): @property def fallback_name(self): - return self._config.table + return self.config.table + + @property + def pandas_df(self): + return self.execute() def equals(self, other): if isinstance(other, self.__class__): return ( - self._config.dialect, - self._config.driver, - self._config.host, - self._config.port, - self._config.username, - self._config.password, + self.config.dialect, + self.config.driver, + self.config.host, + self.config.port, ) == ( - other._config.dialect, - other._config.driver, - other._config.host, - other._config.port, - other._config.username, - other._config.password, + other.config.dialect, + other.config.driver, + other.config.host, + other.config.port, ) return False @@ -415,7 +445,11 @@ class SqliteConnector(SQLConnector): Sqlite connector are used to connect to Sqlite databases. """ - def __init__(self, config: Union[SqliteConnectorConfig, dict]): + def __init__( + self, + config: Union[SqliteConnectorConfig, dict], + **kwargs, + ): """ Initialize the Sqlite connector with the given configuration. @@ -427,7 +461,7 @@ def __init__(self, config: Union[SqliteConnectorConfig, dict]): sqlite_env_vars = {"database": "SQLITE_DB_PATH", "table": "TABLENAME"} config = self._populate_config_from_env(config, sqlite_env_vars) - super().__init__(config) + super().__init__(config, **kwargs) def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): """ @@ -459,7 +493,7 @@ def __del__(self): self._connection.close() @cache - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. This information is passed to the LLM to provide the schema of the data source. @@ -470,12 +504,12 @@ def head(self): if self.logger: self.logger.log( - f"Getting head of {self._config.table} " - f"using dialect {self._config.dialect}" + f"Getting head of {self.config.table} " + f"using dialect {self.config.dialect}" ) # Run a SQL query to get all the columns names and 5 random rows - query = self._build_query(limit=5, order="RANDOM()") + query = self._build_query(limit=n, order="RANDOM()") # Return the head of the data source return pd.read_sql(query, self._connection) @@ -488,9 +522,9 @@ def __repr__(self): str: The string representation of the SQL connector. """ return ( - f"<{self.__class__.__name__} dialect={self._config.dialect} " - f"database={self._config.database} " - f"table={self._config.table}>" + f"<{self.__class__.__name__} dialect={self.config.dialect} " + f"database={self.config.database} " + f"table={self.config.table}>" ) @@ -499,7 +533,11 @@ class MySQLConnector(SQLConnector): MySQL connectors are used to connect to MySQL databases. """ - def __init__(self, config: Union[SQLConnectorConfig, dict]): + def __init__( + self, + config: Union[SQLConnectorConfig, dict], + **kwargs, + ): """ Initialize the MySQL connector with the given configuration. @@ -519,7 +557,7 @@ def __init__(self, config: Union[SQLConnectorConfig, dict]): } config = self._populate_config_from_env(config, mysql_env_vars) - super().__init__(config) + super().__init__(config, **kwargs) class PostgreSQLConnector(SQLConnector): @@ -527,7 +565,11 @@ class PostgreSQLConnector(SQLConnector): PostgreSQL connectors are used to connect to PostgreSQL databases. """ - def __init__(self, config: Union[SQLConnectorConfig, dict]): + def __init__( + self, + config: Union[SQLConnectorConfig, dict], + **kwargs, + ): """ Initialize the PostgreSQL connector with the given configuration. @@ -547,10 +589,10 @@ def __init__(self, config: Union[SQLConnectorConfig, dict]): } config = self._populate_config_from_env(config, postgresql_env_vars) - super().__init__(config) + super().__init__(config, **kwargs) @cache - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. This information is passed to the LLM to provide the schema of the data source. @@ -561,12 +603,12 @@ def head(self): if self.logger: self.logger.log( - f"Getting head of {self._config.table} " - f"using dialect {self._config.dialect}" + f"Getting head of {self.config.table} " + f"using dialect {self.config.dialect}" ) # Run a SQL query to get all the columns names and 5 random rows - query = self._build_query(limit=5, order="RANDOM()") + query = self._build_query(limit=n, order="RANDOM()") # Return the head of the data source return pd.read_sql(query, self._connection) diff --git a/pandasai/connectors/yahoo_finance.py b/pandasai/connectors/yahoo_finance.py index d714331e1..4e11ba0cc 100644 --- a/pandasai/connectors/yahoo_finance.py +++ b/pandasai/connectors/yahoo_finance.py @@ -7,7 +7,18 @@ from ..constants import DEFAULT_FILE_PERMISSIONS from ..helpers.path import find_project_root -from .base import BaseConnector, YahooFinanceConnectorConfig +from .base import BaseConnector, BaseConnectorConfig + + +class YahooFinanceConnectorConfig(BaseConnectorConfig): + """ + Connector configuration for Yahoo Finance. + """ + + dialect: str = "yahoo_finance" + host: str = "yahoo.finance.com" + database: str = "stock_data" + host: str class YahooFinanceConnector(BaseConnector): @@ -22,6 +33,7 @@ def __init__( stock_ticker: Optional[str] = None, config: Optional[Union[YahooFinanceConnectorConfig, dict]] = None, cache_interval: int = 600, + **kwargs, ): if not stock_ticker and not config: raise ValueError( @@ -49,17 +61,17 @@ def __init__( self._cache_interval = cache_interval super().__init__(yahoo_finance_config) - self.ticker = yfinance.Ticker(self._config.table) + self.ticker = yfinance.Ticker(self.config.table) - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. Returns: - DataFrameType: The head of the data source that the connector is + DataFrame: The head of the data source that the connector is connected to. connected to. """ - return self.ticker.history(period="5d") + return self.ticker.history(period=f"{n}d") def _get_cache_path(self, include_additional_filters: bool = False): """ @@ -74,7 +86,7 @@ def _get_cache_path(self, include_additional_filters: bool = False): except ValueError: cache_dir = os.path.join(os.getcwd(), "cache") - return os.path.join(cache_dir, f"{self._config.table}_data.parquet") + return os.path.join(cache_dir, f"{self.config.table}_data.parquet") def _get_cache_path(self): """ @@ -87,7 +99,7 @@ def _get_cache_path(self): os.makedirs(cache_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True) - return os.path.join(cache_dir, f"{self._config.table}_data.parquet") + return os.path.join(cache_dir, f"{self.config.table}_data.parquet") def _cached(self): """ @@ -119,7 +131,7 @@ def execute(self): Execute the connector and return the result. Returns: - DataFrameType: The result of the connector. + DataFrame: The result of the connector. """ if cached_path := self._cached(): return pd.read_parquet(cached_path) @@ -180,4 +192,8 @@ def fallback_name(self): Returns: str: The fallback name of the connector. """ - return self._config.table + return self.config.table + + @property + def pandas_df(self): + return self.execute() diff --git a/pandasai/constants.py b/pandasai/constants.py index 393921a74..79bcafae1 100644 --- a/pandasai/constants.py +++ b/pandasai/constants.py @@ -8,6 +8,9 @@ # Default directory to store chart if user doesn't provide any DEFAULT_CHART_DIRECTORY = "exports/charts" +# Default directory for cache +DEFAULT_CACHE_DIRECTORY = "cache" + # Default permissions for files and directories DEFAULT_FILE_PERMISSIONS = 0o755 diff --git a/pandasai/ee/LICENSE b/pandasai/ee/LICENSE new file mode 100644 index 000000000..86060d530 --- /dev/null +++ b/pandasai/ee/LICENSE @@ -0,0 +1,36 @@ +The PandasAI Enterprise license (the “Enterprise License”) +Copyright (c) 2024 Sinaptik GmbH + +With regard to the PandasAI Software: + +This software and associated documentation files (the "Software") may only be +used in production, if you (and any entity that you represent) have agreed to, +and are in compliance with, the PandasAI Subscription Terms of Service, available +at https://pandas-ai.com/terms (the “Enterprise Terms”), or other +agreement governing the use of the Software, as agreed by you and PandasAI, +and otherwise have a valid PandasAI Enterprise license for the +correct number of user seats. Subject to the foregoing sentence, you are free to +modify this Software and publish patches to the Software. You agree that PandasAI +and/or its licensors (as applicable) retain all right, title and interest in and +to all such modifications and/or patches, and all such modifications and/or +patches may only be used, copied, modified, displayed, distributed, or otherwise +exploited with a valid PandasAI Enterprise license for the correct +number of user seats. Notwithstanding the foregoing, you may copy and modify +the Software for development and testing purposes, without requiring a +subscription. You agree that PandasAI and/or its licensors (as applicable) retain +all right, title and interest in and to all such modifications. You are not +granted any other rights beyond what is expressly stated herein. Subject to the +foregoing, it is forbidden to copy, merge, publish, distribute, sublicense, +and/or sell the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +For all third party components incorporated into the PandasAI Software, those +components are licensed under the original license provided by the owner of the +applicable component. diff --git a/pandasai/ee/connectors/__init__.py b/pandasai/ee/connectors/__init__.py new file mode 100644 index 000000000..d0fc8520c --- /dev/null +++ b/pandasai/ee/connectors/__init__.py @@ -0,0 +1,15 @@ +""" +Connectors are used to connect to databases, external APIs, and other data sources. + +The connectors package contains all the connectors that are used by the application. +""" + +from .databricks import DatabricksConnector +from .google_big_query import GoogleBigQueryConnector +from .snowflake import SnowFlakeConnector + +__all__ = [ + "SnowFlakeConnector", + "DatabricksConnector", + "GoogleBigQueryConnector", +] diff --git a/pandasai/connectors/databricks.py b/pandasai/ee/connectors/databricks.py similarity index 65% rename from pandasai/connectors/databricks.py rename to pandasai/ee/connectors/databricks.py index 2365cdce3..89e449b93 100644 --- a/pandasai/connectors/databricks.py +++ b/pandasai/ee/connectors/databricks.py @@ -7,8 +7,19 @@ from sqlalchemy import create_engine -from .base import BaseConnectorConfig, DatabricksConnectorConfig -from .sql import SQLConnector +from ...connectors.base import BaseConnectorConfig +from ...connectors.sql import SQLBaseConnectorConfig, SQLConnector + + +class DatabricksConnectorConfig(SQLBaseConnectorConfig): + """ + Connector configuration for DataBricks. + """ + + host: str + port: int + token: str + httpPath: str class DatabricksConnector(SQLConnector): @@ -16,7 +27,11 @@ class DatabricksConnector(SQLConnector): Databricks connectors are used to connect to Databricks Data Cloud. """ - def __init__(self, config: Union[DatabricksConnectorConfig, dict]): + def __init__( + self, + config: Union[DatabricksConnectorConfig, dict], + **kwargs, + ): """ Initialize the Databricks connector with the given configuration. @@ -34,7 +49,7 @@ def __init__(self, config: Union[DatabricksConnectorConfig, dict]): } config = self._populate_config_from_env(config, env_vars) - super().__init__(config) + super().__init__(config, **kwargs) def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): return DatabricksConnectorConfig(**config) @@ -61,24 +76,22 @@ def __repr__(self): str: The string representation of the Databricks connector. """ return ( - f"<{self.__class__.__name__} dialect={self._config.dialect} " - f"host={self._config.host} port={self._config.port} " - f"database={self._config.database} httpPath={str(self._config.httpPath)}" + f"<{self.__class__.__name__} dialect={self.config.dialect} " + f"host={self.config.host} port={self.config.port} " + f"database={self.config.database} httpPath={str(self.config.httpPath)}" ) def equals(self, other): if isinstance(other, self.__class__): return ( - self._config.dialect, - self._config.token, - self._config.host, - self._config.port, - self._config.httpPath, + self.config.dialect, + self.config.host, + self.config.port, + self.config.httpPath, ) == ( - other._config.dialect, - other._config.token, - other._config.host, - other._config.port, - other._config.httpPath, + other.config.dialect, + other.config.host, + other.config.port, + other.config.httpPath, ) return False diff --git a/pandasai/ee/connectors/google_big_query.py b/pandasai/ee/connectors/google_big_query.py new file mode 100644 index 000000000..21708b02e --- /dev/null +++ b/pandasai/ee/connectors/google_big_query.py @@ -0,0 +1,77 @@ +""" +Google Big Query connector is used to connect to dataset from +google big query api. +""" + +from typing import Union + +from sqlalchemy import create_engine + +from ...connectors.base import BaseConnectorConfig +from ...connectors.sql import SQLBaseConnectorConfig, SQLConnector + + +class GoogleBigQueryConnectorConfig(SQLBaseConnectorConfig): + """ + Connector configuration for big query. + """ + + credentials_path: str + database: str + table: str + projectID: str + + +class GoogleBigQueryConnector(SQLConnector): + """ + GoogleBigQuery Connectors are used to connect to BigQuery Data Cloud. + """ + + def __init__(self, config: Union[GoogleBigQueryConnectorConfig, dict]): + """ + Initialize the GoogleBigQuery connector with the given configuration. + + Args: + config (ConnectorConfig): The config for the GoogleBigQuery connector. + """ + config["dialect"] = "bigquery" + if isinstance(config, dict): + env_vars = { + "database": "BIG_QUERY_DATABASE", + "credentials_path": "KEYFILE_PATH", + "projectID": "PROJECT_ID", + } + config = self._populate_config_from_env(config, env_vars) + + super().__init__(config) + + def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): + return GoogleBigQueryConnectorConfig(**config) + + def _init_connection(self, config: GoogleBigQueryConnectorConfig): + """ + Initialize Database Connection + + Args: + config (GoogleBigQueryConnectorConfig): Configurations to load database + + """ + + self._engine = create_engine( + f"{config.dialect}://{config.projectID}/{config.database}", + credentials_path=config.credentials_path, + ) + + self._connection = self._engine.connect() + + def __repr__(self): + """ + Return the string representation of the Google big query connector. + + Returns: + str: The string representation of the Google big query connector. + """ + return ( + f"<{self.__class__.__name__} dialect={self.config.dialect} " + f"projectid= {self.config.projectID} database={self.config.database} >" + ) diff --git a/pandasai/connectors/snowflake.py b/pandasai/ee/connectors/snowflake.py similarity index 65% rename from pandasai/connectors/snowflake.py rename to pandasai/ee/connectors/snowflake.py index f2937b014..5ace3cde2 100644 --- a/pandasai/connectors/snowflake.py +++ b/pandasai/ee/connectors/snowflake.py @@ -9,8 +9,21 @@ import pandasai.pandas as pd -from .base import BaseConnectorConfig, SnowFlakeConnectorConfig -from .sql import SQLConnector +from ...connectors.base import BaseConnectorConfig +from ...connectors.sql import SQLBaseConnectorConfig, SQLConnector + + +class SnowFlakeConnectorConfig(SQLBaseConnectorConfig): + """ + Connector configuration for SnowFlake. + """ + + account: str + database: str + username: str + password: str + dbSchema: str + warehouse: str class SnowFlakeConnector(SQLConnector): @@ -18,7 +31,11 @@ class SnowFlakeConnector(SQLConnector): SnowFlake connectors are used to connect to SnowFlake Data Cloud. """ - def __init__(self, config: Union[SnowFlakeConnectorConfig, dict]): + def __init__( + self, + config: Union[SnowFlakeConnectorConfig, dict], + **kwargs, + ): """ Initialize the SnowFlake connector with the given configuration. @@ -38,7 +55,7 @@ def __init__(self, config: Union[SnowFlakeConnectorConfig, dict]): } config = self._populate_config_from_env(config, snowflake_env_vars) - super().__init__(config) + super().__init__(config, **kwargs) def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]): return SnowFlakeConnectorConfig(**config) @@ -58,7 +75,7 @@ def _init_connection(self, config: SnowFlakeConnectorConfig): self._connection = self._engine.connect() @cache - def head(self): + def head(self, n: int = 5) -> pd.DataFrame: """ Return the head of the data source that the connector is connected to. This information is passed to the LLM to provide the schema of the data source. @@ -69,12 +86,12 @@ def head(self): if self.logger: self.logger.log( - f"Getting head of {self._config.table} " - f"using dialect {self._config.dialect}" + f"Getting head of {self.config.table} " + f"using dialect {self.config.dialect}" ) # Run a SQL query to get all the columns names and 5 random rows - query = self._build_query(limit=5, order="RANDOM()") + query = self._build_query(limit=n, order="RANDOM()") # Return the head of the data source return pd.read_sql(query, self._connection) @@ -87,24 +104,24 @@ def __repr__(self): str: The string representation of the SnowFlake connector. """ return ( - f"<{self.__class__.__name__} dialect={self._config.dialect} " - f"Account={self._config.account} " - f"warehouse={self._config.warehouse} " - f"database={self._config.database} schema={str(self._config.dbSchema)} " - f"table={self._config.table}>" + f"<{self.__class__.__name__} dialect={self.config.dialect} " + f"Account={self.config.account} " + f"warehouse={self.config.warehouse} " + f"database={self.config.database} schema={str(self.config.dbSchema)} " + f"table={self.config.table}>" ) def equals(self, other): if isinstance(other, self.__class__): return ( - self._config.dialect, - self._config.account, - self._config.username, - self._config.password, + self.config.dialect, + self.config.dbSchema, + self.config.warehouse, + self.config.account, ) == ( - other._config.dialect, - other._config.account, - other._config.username, - other._config.password, + other.config.dialect, + other.config.dbSchema, + other.config.warehouse, + other.config.account, ) return False diff --git a/pandasai/ee/vectorstores/__init__.py b/pandasai/ee/vectorstores/__init__.py new file mode 100644 index 000000000..aa232858d --- /dev/null +++ b/pandasai/ee/vectorstores/__init__.py @@ -0,0 +1,7 @@ +""" +Vector stores to store data for training purpose +""" + +from .chroma import ChromaDB + +__all__ = ["ChromaDB"] diff --git a/pandasai/ee/vectorstores/chroma.py b/pandasai/ee/vectorstores/chroma.py new file mode 100644 index 000000000..de008a176 --- /dev/null +++ b/pandasai/ee/vectorstores/chroma.py @@ -0,0 +1,318 @@ +import os +import uuid +from typing import Callable, Iterable, List, Optional, Union + +import chromadb +from chromadb import config +from chromadb.utils import embedding_functions + +from pandasai.helpers.logger import Logger +from pandasai.helpers.path import find_project_root +from pandasai.vectorstores.vectorstore import VectorStore + +DEFAULT_EMBEDDING_FUNCTION = embedding_functions.DefaultEmbeddingFunction() + + +class ChromaDB(VectorStore): + """ + Implementation of ChromeDB vector store + """ + + _logger: Logger + + def __init__( + self, + collection_name: str = "pandasai", + embedding_function: Optional[Callable[[List[str]], List[float]]] = None, + persist_path: Optional[str] = None, + client_settings: Optional[config.Settings] = None, + max_samples: int = 1, + similary_threshold: int = 1.5, + logger: Optional[Logger] = None, + ) -> None: + self._logger = logger or Logger() + self._max_samples = max_samples + self._similarity_threshold = similary_threshold + + # Initialize Chromadb Client + # initialize from client settings if exists + if client_settings: + client_settings.persist_directory = ( + persist_path or client_settings.persist_directory + ) + _client_settings = client_settings + + # use persist path if exists + elif persist_path: + _client_settings = config.Settings( + is_persistent=True, anonymized_telemetry=False + ) + _client_settings.persist_directory = persist_path + # else use root as default path + else: + _client_settings = config.Settings( + is_persistent=True, anonymized_telemetry=False + ) + _client_settings.persist_directory = os.path.join( + find_project_root(), "chromadb" + ) + + self._client_settings = _client_settings + self._client = chromadb.Client(_client_settings) + self._persist_directory = _client_settings.persist_directory + + self._logger.log(f"Persisting Agent Training data in {self._persist_directory}") + + self._embedding_function = embedding_function or DEFAULT_EMBEDDING_FUNCTION + + self._qa_collection = self._client.get_or_create_collection( + name=f"{collection_name}-qa", embedding_function=self._embedding_function + ) + + self._docs_collection = self._client.get_or_create_collection( + name=f"{collection_name}-docs", embedding_function=self._embedding_function + ) + + self._logger.log(f"Successfully initialized collection {collection_name}") + + def add_question_answer( + self, + queries: Iterable[str], + codes: Iterable[str], + ids: Optional[Iterable[str]] = None, + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Add question and answer(code) to the training set + Args: + query: string of question + code: str + ids: Optional Iterable of ids associated with the texts. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + Returns: + List of ids from adding the texts into the vectorstore. + """ + if len(queries) != len(codes): + raise ValueError( + f"Queries and codes dimension doesn't match {len(queries)} != {len(codes)}" + ) + + if ids is None: + ids = [f"{str(uuid.uuid4())}-qa" for _ in queries] + qa_str = [self._format_qa(query, code) for query, code in zip(queries, codes)] + + self._qa_collection.add( + documents=qa_str, + metadatas=metadatas, + ids=ids, + ) + + def add_docs( + self, + docs: Iterable[str], + ids: Optional[Iterable[str]] = None, + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Add docs to the training set + Args: + docs: Iterable of strings to add to the vectorstore. + ids: Optional Iterable of ids associated with the texts. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + if ids is None: + ids = [f"{str(uuid.uuid4())}-docs" for _ in docs] + self._docs_collection.add( + documents=docs, + metadatas=metadatas, + ids=ids, + ) + + def update_question_answer( + self, + ids: Iterable[str], + queries: Iterable[str], + codes: Iterable[str], + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Update question and answer(code) to the training set + Args: + ids: Iterable of ids associated with the texts. + queries: string of question + codes: str + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + Returns: + List of ids from updating the texts into the vectorstore. + """ + if len(queries) != len(codes): + raise ValueError( + f"Queries and codes dimension doesn't match {len(queries)} != {len(codes)}" + ) + + qa_str = [self._format_qa(query, code) for query, code in zip(queries, codes)] + self._qa_collection.update( + documents=qa_str, + metadatas=metadatas, + ids=ids, + ) + + def update_docs( + self, + ids: Iterable[str], + docs: Iterable[str], + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Update docs to the training set + Args: + ids: Iterable of ids associated with the texts. + docs: Iterable of strings to update to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + self._docs_collection.update( + documents=docs, + metadatas=metadatas, + ids=ids, + ) + + def delete_question_and_answers( + self, ids: Optional[List[str]] = None + ) -> Optional[bool]: + """ + Delete by vector ID to delete question and answers + Args: + ids: List of ids to delete + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise + """ + self._qa_collection.delete(ids=ids) + return True + + def delete_docs(self, ids: Optional[List[str]] = None) -> Optional[bool]: + """ + Delete by vector ID to delete docs + Args: + ids: List of ids to delete + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise + """ + self._docs_collection.delete(ids=ids) + return True + + def get_relevant_question_answers( + self, question: str, k: Union[int, None] = None + ) -> List[dict]: + """ + Returns relevant question answers based on search + """ + k = k or self._max_samples + + relevant_data: chromadb.QueryResult = self._qa_collection.query( + query_texts=question, + n_results=k, + include=["metadatas", "documents", "distances"], + ) + + return self._filter_docs_based_on_distance( + relevant_data, self._similarity_threshold + ) + + def get_relevant_docs(self, question: str, k: int = None) -> List[dict]: + """ + Returns relevant documents based search + """ + k = k or self._max_samples + + relevant_data: chromadb.QueryResult = self._docs_collection.query( + query_texts=question, + n_results=k, + include=["metadatas", "documents", "distances"], + ) + + return self._filter_docs_based_on_distance( + relevant_data, self._similarity_threshold + ) + + def get_relevant_question_answers_by_id(self, ids: Iterable[str]) -> List[dict]: + """ + Returns relevant question answers based on ids + """ + + relevant_data: chromadb.QueryResult = self._qa_collection.get( + ids=ids, + include=["metadatas", "documents"], + ) + + return relevant_data + + def get_relevant_docs_by_id(self, ids: Iterable[str]) -> List[dict]: + """ + Returns relevant question answers based on ids + """ + + relevant_data: chromadb.QueryResult = self._docs_collection.get( + ids=ids, + include=["metadatas", "documents"], + ) + + return relevant_data + + def get_relevant_qa_documents(self, question: str, k: int = None) -> List[str]: + """ + Returns relevant question answers documents only + Args: + question (_type_): list of documents + """ + return self.get_relevant_question_answers(question, k)["documents"][0] + + def get_relevant_docs_documents(self, question: str, k: int = None) -> List[str]: + """ + Returns relevant question answers documents only + Args: + question (_type_): list of documents + """ + return self.get_relevant_docs(question, k)["documents"][0] + + def _filter_docs_based_on_distance( + self, documents: chromadb.QueryResult, threshold: int + ) -> List[str]: + """ + Filter documents based on threshold + Args: + documents (List[str]): list of documents in string + distances (List[float]): list of distances in float + threshold (int): similarity threshold + + Returns: + _type_: _description_ + """ + filtered_data = [ + (doc, distance, metadata, ids) + for doc, distance, metadata, ids in zip( + documents["documents"][0], + documents["distances"][0], + documents["metadatas"][0], + documents["ids"][0], + ) + if distance < threshold + ] + + return { + key: [[data[i] for data in filtered_data]] + for i, key in enumerate(["documents", "distances", "metadatas", "ids"]) + } diff --git a/pandasai/exceptions.py b/pandasai/exceptions.py index ab11ddf59..610690fbf 100644 --- a/pandasai/exceptions.py +++ b/pandasai/exceptions.py @@ -6,7 +6,6 @@ class InvalidRequestError(Exception): - """ Raised when the request is not successful. @@ -16,7 +15,6 @@ class InvalidRequestError(Exception): class APIKeyNotFoundError(Exception): - """ Raised when the API key is not defined/declared. @@ -186,3 +184,45 @@ class InvalidLLMOutputType(Exception): Args: Exception (Exception): InvalidLLMOutputType """ + + +class ExecuteSQLQueryNotUsed(Exception): + """ + Raise error if Execute SQL Query is not used + Args: + Exception (Exception): ExecuteSQLQueryNotUsed + """ + + +class MissingVectorStoreError(Exception): + """ + Raise error if vector store is not found + Args: + Exception (Exception): MissingVectorStoreError + """ + + +class PandasAIApiKeyError(Exception): + """ + Raise error if api key is not found for remote vectorstore and llm + Args: + Exception (Exception): PandasAIApiKeyError + """ + + def __init__(self): + message = ( + "The api_key client option must be set either by passing api_key to the client " + "or by setting the PANDASAI_API_KEY environment variable. To get the key follow below steps:\n" + "1. Go to https://domer.ai and sign up\n" + "2. From settings go to API keys and copy\n" + "3. Set environment variable like os.environ['PANDASAI_API_KEY'] = '$2a$10$flb7....'" + ) + super().__init__(message) + + +class PandasAIApiCallError(Exception): + """ + Raise error if exception in API request fails + Args: + Exception (Exception): PandasAIApiCallError + """ diff --git a/pandasai/helpers/cache.py b/pandasai/helpers/cache.py index a735cf4eb..9b1e4a1c4 100644 --- a/pandasai/helpers/cache.py +++ b/pandasai/helpers/cache.py @@ -97,7 +97,6 @@ def get_cache_key(self, context: Any) -> str: # make the cache key unique for each combination of dfs for df in context.dfs: - hash = df.column_hash() - cache_key += str(hash) + cache_key += str(df.column_hash) return cache_key diff --git a/pandasai/helpers/code_manager.py b/pandasai/helpers/code_manager.py index 39f941375..f99c61571 100644 --- a/pandasai/helpers/code_manager.py +++ b/pandasai/helpers/code_manager.py @@ -13,9 +13,12 @@ from pandasai.helpers.skills_manager import SkillsManager from pandasai.helpers.sql import extract_table_names +from ..connectors import BaseConnector +from ..connectors.sql import SQLConnector from ..constants import WHITELISTED_BUILTINS, WHITELISTED_LIBRARIES from ..exceptions import ( BadImportError, + ExecuteSQLQueryNotUsed, InvalidConfigError, MaliciousQueryError, NoResultFoundError, @@ -39,16 +42,26 @@ def __init__( prompt_id (uuid.UUID): Prompt ID skills_manager (SkillsManager): Skills Manager """ - self._skills_manager = skills_manager - self._prompt_id = prompt_id + self.skills_manager = skills_manager + self.prompt_id = prompt_id - @property - def prompt_id(self): - return self._prompt_id - @property - def skills_manager(self): - return self._skills_manager +class FunctionCallVisitor(ast.NodeVisitor): + """ + Iterate over the code to find function calls + """ + + def __init__(self): + self.function_calls = [] + + def visit_Call(self, node): + if isinstance(node.func, ast.Name): + self.function_calls.append(node.func.id) + elif isinstance(node.func, ast.Attribute) and isinstance( + node.func.value, ast.Name + ): + self.function_calls.append(f"{node.func.value.id}.{node.func.attr}") + self.generic_visit(node) class CodeManager: @@ -87,6 +100,7 @@ def __init__( self._dfs = dfs self._config = config self._logger = logger + self._function_call_vistor = FunctionCallVisitor() def _required_dfs(self, code: str) -> List[str]: """ @@ -103,7 +117,7 @@ def _required_dfs(self, code: str) -> List[str]: # Sometimes GPT-3.5/4 use a for loop to iterate over the dfs (even if there is only one) # or they concatenate the dfs. In this case we need all the dfs - if "for df in dfs" in code or "pd.concat(dfs)" in code: + if "for df in dfs" in code or "pd.concat(dfs" in code: return self._dfs required_dfs = [] @@ -112,7 +126,7 @@ def _required_dfs(self, code: str) -> List[str]: required_dfs.append(df) else: required_dfs.append(None) - return required_dfs + return required_dfs or self._dfs def _replace_plot_png(self, code): """ @@ -124,25 +138,6 @@ def _replace_plot_png(self, code): """ return re.sub(r"""(['"])([^'"]*\.png)\1""", r"\1temp_chart.png\1", code) - def _validate_direct_sql(self, dfs: List) -> bool: - """ - Raises error if they don't belong sqlconnector or have different credentials - Args: - dfs (List[SmartDataframe]): list of SmartDataframes - - Raises: - InvalidConfigError: Raise Error in case of config is set but criteria is not met - """ - if self._config.direct_sql and all(df.is_connector() for df in dfs): - if all(df == dfs[0] for df in dfs): - return True - else: - raise InvalidConfigError( - "Direct requires all SQLConnector and they belong to same datasource " - "and have same credentials" - ) - return False - def execute_code(self, code: str, context: CodeExecutionContext) -> Any: """ Execute the python code generated by LLMs to answer the question @@ -199,8 +194,8 @@ def execute_code(self, code: str, context: CodeExecutionContext) -> Any: environment: dict = self._get_environment() environment["dfs"] = self._get_originals(dfs) - if self._validate_direct_sql(self._dfs): - environment["execute_sql_query"] = self._dfs[0].get_query_exec_func() + if self._config.direct_sql: + environment["execute_sql_query"] = self._dfs[0].execute_direct_sql_query # Add skills to the env if context.skills_manager.used_skills: @@ -233,13 +228,14 @@ def _get_originals(self, dfs): original_dfs.append(None) continue - if df.has_connector: - extracted_filters = self._extract_filters(self._current_code_executed) - filters = extracted_filters.get(f"dfs[{index}]", []) - df.connector.set_additional_filters(filters) - df.load_connector(temporary=len(filters) > 0) + extracted_filters = self._extract_filters(self._current_code_executed) + filters = extracted_filters.get(f"dfs[{index}]", []) + df.set_additional_filters(filters) + + df.execute() + # df.load_connector(partial=len(filters) > 0) - original_dfs.append(df.dataframe) + original_dfs.append(df.pandas_df) return original_dfs @@ -252,9 +248,11 @@ def _get_environment(self) -> dict: return { "pd": pd, **{ - lib["alias"]: getattr(import_dependency(lib["module"]), lib["name"]) - if hasattr(import_dependency(lib["module"]), lib["name"]) - else import_dependency(lib["module"]) + lib["alias"]: ( + getattr(import_dependency(lib["module"]), lib["name"]) + if hasattr(import_dependency(lib["module"]), lib["name"]) + else import_dependency(lib["module"]) + ) for lib in self._additional_dependencies }, "__builtins__": { @@ -315,7 +313,7 @@ def _is_unsafe(self, node: ast.stmt) -> bool: def find_function_calls(self, node: ast.AST, context: CodeExecutionContext): if isinstance(node, ast.Call): if isinstance(node.func, ast.Name): - if context.skills_manager.skill_exists: + if context.skills_manager.skill_exists(node.func.id): context.skills_manager.add_used_skill(node.func.id) elif isinstance(node.func, ast.Attribute) and isinstance( node.func.value, ast.Name @@ -334,6 +332,31 @@ def check_direct_sql_func_def_exists(self, node: ast.AST): and node.name == "execute_sql_query" ) + def check_skill_func_def_exists(self, node: ast.AST, context: CodeExecutionContext): + return isinstance( + node, ast.FunctionDef + ) and context.skills_manager.skill_exists(node.name) + + def _validate_direct_sql(self, dfs: List[BaseConnector]) -> bool: + """ + Raises error if they don't belong sqlconnector or have different credentials + Args: + dfs (List[BaseConnector]): list of BaseConnectors + + Raises: + InvalidConfigError: Raise Error in case of config is set but criteria is not met + """ + + if self._config.direct_sql: + if all((isinstance(df, SQLConnector) and df.equals(dfs[0])) for df in dfs): + return True + else: + raise InvalidConfigError( + "Direct requires all SQLConnector and they belong to same datasource " + "and have same credentials" + ) + return False + def _get_sql_irrelevant_tables(self, node: ast.Assign): for target in node.targets: if ( @@ -344,7 +367,7 @@ def _get_sql_irrelevant_tables(self, node: ast.Assign): ): sql_query = node.value.value table_names = extract_table_names(sql_query) - allowed_table_names = [df.table_name for df in self._dfs] + allowed_table_names = [df.name for df in self._dfs] return [ table_name for table_name in table_names @@ -370,6 +393,11 @@ def _clean_code(self, code: str, context: CodeExecutionContext) -> str: # Check for imports and the node where analyze_data is defined new_body = [] + execute_sql_query_used = False + + # find function calls + self._function_call_vistor.visit(tree) + for node in tree.body: if isinstance(node, (ast.Import, ast.ImportFrom)): self._check_imports(node) @@ -387,6 +415,16 @@ def _clean_code(self, code: str, context: CodeExecutionContext) -> str: if self.check_direct_sql_func_def_exists(node): continue + if self.check_skill_func_def_exists(node, context): + continue + + # if generated code contain execute_sql_query usage + if ( + self._validate_direct_sql(self._dfs) + and "execute_sql_query" in self._function_call_vistor.function_calls + ): + execute_sql_query_used = True + # Sanity for sql query the code should only use allowed tables if ( isinstance(node, ast.Assign) @@ -401,6 +439,12 @@ def _clean_code(self, code: str, context: CodeExecutionContext) -> str: new_body.append(node) + # Enforcing use of execute_sql_query via Error Prompt Pipeline + if self._config.direct_sql and not execute_sql_query_used: + raise ExecuteSQLQueryNotUsed( + "For Direct SQL set to true, execute_sql_query function must be used. Generating Error Prompt!!!" + ) + new_tree = ast.Module(body=new_body) return astor.to_source(new_tree, pretty_source=lambda x: "".join(x)).strip() @@ -503,6 +547,9 @@ def _tokenize_operand(operand_node: ast.expr) -> Generator[str, None, None]: >>> print(list(res)) ['foo', 2, 1, 0] """ + if isinstance(operand_node, ast.Call): + yield operand_node.func.attr + if isinstance(operand_node, ast.Subscript): slice_ = operand_node.slice.value yield from CodeManager._tokenize_operand(operand_node.value) @@ -582,49 +629,24 @@ def _extract_comparisons(self, tree: ast.Module) -> dict[str, list]: call_visitor = CallVisitor() call_visitor.visit(tree) - calls = call_visitor.call_nodes for node in ast.walk(tree): - if isinstance(node, ast.Compare): - is_call_on_left = isinstance(node.left, ast.Call) - is_polars = False - is_calling_col = False - try: - is_polars = node.left.func.value.id in ("pl", "polars") - is_calling_col = node.left.func.attr == "col" - except AttributeError: - pass - - if is_call_on_left and is_polars and is_calling_col: - df_name = self._get_nearest_func_call( - node.lineno, calls, "filter" - ).func.value.id - current_df = self._get_df_id_by_nearest_assignment( - node.lineno, assignments, df_name + if isinstance(node, ast.Compare) and isinstance(node.left, ast.Subscript): + name, *slices = self._tokenize_operand(node.left) + current_df = ( + self._get_df_id_by_nearest_assignment( + node.lineno, assignments, name ) - left_str = node.left.args[0].value - - for op, right in zip(node.ops, node.comparators): - op_str = self._ast_comparator_map.get(type(op), "Unknown") - right_str = right.value - - comparisons[current_df].append((left_str, op_str, right_str)) - elif isinstance(node.left, ast.Subscript): - name, *slices = self._tokenize_operand(node.left) - current_df = ( - self._get_df_id_by_nearest_assignment( - node.lineno, assignments, name - ) - or current_df - ) - left_str = slices[-1] if slices else name + or current_df + ) + left_str = slices[-1] if slices else name - for op, right in zip(node.ops, node.comparators): - op_str = self._ast_comparator_map.get(type(op), "Unknown") - name, *slices = self._tokenize_operand(right) - right_str = slices[-1] if slices else name + for op, right in zip(node.ops, node.comparators): + op_str = self._ast_comparator_map.get(type(op), "Unknown") + name, *slices = self._tokenize_operand(right) + right_str = slices[-1] if slices else name - comparisons[current_df].append((left_str, op_str, right_str)) + comparisons[current_df].append((left_str, op_str, right_str)) return comparisons def _extract_filters(self, code) -> dict[str, list]: @@ -664,7 +686,7 @@ def _extract_filters(self, code) -> dict[str, list]: "Unable to extract filters for passed code", level=logging.ERROR ) self._logger.log(f"{traceback.format_exc()}", level=logging.DEBUG) - raise + return {} return filters diff --git a/pandasai/helpers/data_sampler.py b/pandasai/helpers/data_sampler.py index ed55fb0e1..3a6915fe7 100644 --- a/pandasai/helpers/data_sampler.py +++ b/pandasai/helpers/data_sampler.py @@ -14,17 +14,14 @@ import pandasai.pandas as pd from .anonymizer import Anonymizer -from .df_info import DataFrameType, df_type class DataSampler: - def __init__(self, df: DataFrameType): + def __init__(self, df: pd.DataFrame): """ Args: - df (SmartDataframe): SmartDataframe to sample from. + df (pd.DataFrame): pd.DataFrame to sample from. """ - if df_type(df) == "polars": - df = df.to_pandas() self.df = df def sample(self, n: int = 3) -> pd.DataFrame: @@ -34,7 +31,7 @@ def sample(self, n: int = 3) -> pd.DataFrame: n (int, optional): Number of rows to sample. Defaults to 5. Returns: - DataFrameType: Sampled dataframe. + pd.DataFrame: Sampled dataframe. """ sampled_df = pd.DataFrame() if len(self.df) <= n: diff --git a/pandasai/helpers/dataframe_serializer.py b/pandasai/helpers/dataframe_serializer.py new file mode 100644 index 000000000..497a8f933 --- /dev/null +++ b/pandasai/helpers/dataframe_serializer.py @@ -0,0 +1,143 @@ +import json +from enum import Enum + +import pandas as pd +import yaml + + +class DataframeSerializerType(Enum): + JSON = 1 + YML = 2 + CSV = 3 + SQL = 4 + + +class DataframeSerializer: + def __init__(self) -> None: + pass + + def serialize( + self, + df: pd.DataFrame, + extras: dict = None, + type_: DataframeSerializerType = DataframeSerializerType.YML, + ) -> str: + if type_ == DataframeSerializerType.YML: + return self.convert_df_to_yml(df, extras) + elif type_ == DataframeSerializerType.JSON: + return self.convert_df_to_json_str(df, extras) + elif type_ == DataframeSerializerType.SQL: + return self.convert_df_sql_connector_to_str(df, extras) + else: + return self.convert_df_to_csv(df, extras) + + def convert_df_to_csv(self, df: pd.DataFrame, extras: dict) -> str: + """ + Convert df to csv like format where csv is wrapped inside + Args: + df (pd.DataFrame): PandasAI dataframe or dataframe + extras (dict, optional): expect index to exists + + Returns: + str: dataframe stringify + """ + dataframe_info = " str: + """ + Convert df to csv like format where csv is wrapped inside
+ Args: + df (pd.DataFrame): PandasAI dataframe or dataframe + extras (dict, optional): expect index to exists + + Returns: + str: dataframe stringify + """ + table_description_tag = ( + f' description="{df.description}"' if df.description is not None else "" + ) + table_head_tag = f'' + return f"{table_head_tag}\n{df.head_df.to_csv()}\n
" + + def convert_df_to_json(self, df: pd.DataFrame, extras: dict) -> dict: + """ + Convert df to json dictionary and return json + Args: + df (pd.DataFrame): PandasAI dataframe or dataframe + extras (dict, optional): expect index to exists + + Returns: + str: dataframe json + """ + # Initialize the result dictionary + df_number_key = f"dfs[{extras['index']}]" + + # Create a dictionary representing the data structure + df_info = { + "name": df.name, + "description": df.description, + "type": extras["type"], + } + # Add DataFrame details to the result + data = { + "rows": df.rows_count, + "columns": df.columns_count, + "schema": {"fields": []}, + } + + # Iterate over DataFrame columns + df_head = df.get_head() + for col_name, col_dtype in df_head.dtypes.items(): + col_info = { + "name": col_name, + "type": str(col_dtype), + "samples": df_head[col_name].head().tolist(), + } + + # Add column description if available + if df.field_descriptions and isinstance(df.field_descriptions, dict): + if col_description := df.field_descriptions.get(col_name, None): + col_info["description"] = col_description + + data["schema"]["fields"].append(col_info) + + result = df_info | data + + return {df_number_key: result} + + def convert_df_to_json_str(self, df: pd.DataFrame, extras: dict) -> str: + """ + Convert df to json and return it as string + Args: + df (pd.DataFrame): PandasAI dataframe or dataframe + extras (dict, optional): expect index to exists + + Returns: + str: dataframe stringify + """ + return json.dumps(self.convert_df_to_json(df, extras)) + + def convert_df_to_yml(self, df: pd.DataFrame, extras: dict) -> str: + json_df = self.convert_df_to_json(df, extras) + + return yaml.dump(json_df, sort_keys=False) diff --git a/pandasai/helpers/file_importer.py b/pandasai/helpers/file_importer.py new file mode 100644 index 000000000..7be869f00 --- /dev/null +++ b/pandasai/helpers/file_importer.py @@ -0,0 +1,32 @@ +import pandas as pd + +from .from_google_sheets import from_google_sheets + + +class FileImporter: + """ + Class to import a dataframe from a file (csv, parquet, xlsx) + """ + + @staticmethod + def import_from_file(file_path: str) -> pd.DataFrame: + """ + Import a dataframe from a file (csv, parquet, xlsx) + + Args: + file_path (str): Path to the file to be imported. + + Returns: + pd.DataFrame: Pandas dataframe + """ + + if file_path.endswith(".csv"): + return pd.read_csv(file_path) + elif file_path.endswith(".parquet"): + return pd.read_parquet(file_path) + elif file_path.endswith(".xlsx"): + return pd.read_excel(file_path) + elif file_path.startswith("https://docs.google.com/spreadsheets/"): + return from_google_sheets(file_path)[0] + else: + raise ValueError("Invalid file format.") diff --git a/pandasai/helpers/folder.py b/pandasai/helpers/folder.py new file mode 100644 index 000000000..3235b5b36 --- /dev/null +++ b/pandasai/helpers/folder.py @@ -0,0 +1,27 @@ +import os + +from pydantic import BaseModel + +from pandasai.constants import DEFAULT_FILE_PERMISSIONS + +from ..helpers.path import find_project_root + + +class FolderConfig(BaseModel): + permissions: str = DEFAULT_FILE_PERMISSIONS + exist_ok: bool = True + + +class Folder: + @staticmethod + def create(path, config: FolderConfig = FolderConfig()): + """Create a folder if it does not exist. + + Args: + path (str): Path to the folder to be created. + """ + try: + cache_dir = os.path.join((find_project_root()), path) + except ValueError: + cache_dir = os.path.join(os.getcwd(), path) + os.makedirs(cache_dir, mode=config.permissions, exist_ok=config.exist_ok) diff --git a/pandasai/helpers/memory.py b/pandasai/helpers/memory.py index e12133ccc..07834a292 100644 --- a/pandasai/helpers/memory.py +++ b/pandasai/helpers/memory.py @@ -7,10 +7,12 @@ class Memory: _messages: list _memory_size: int + _agent_info: str - def __init__(self, memory_size: int = 1): + def __init__(self, memory_size: int = 1, agent_info: Union[str, None] = None): self._messages = [] self._memory_size = memory_size + self._agent_info = agent_info def add(self, message: str, is_user: bool): self._messages.append({"message": message, "is_user": is_user}) @@ -40,7 +42,7 @@ def get_messages(self, limit: int = None) -> list: limit = self._memory_size if limit is None else limit return [ - f"{'Q' if message['is_user'] else 'A'}: {message['message'] if message['is_user'] else self._truncate(message['message'])}" + f"{'### QUERY' if message['is_user'] else '### ANSWER'}\n {message['message'] if message['is_user'] else self._truncate(message['message'])}" for message in self._messages[-limit:] ] @@ -65,9 +67,25 @@ def get_last_message(self) -> str: messages = self.get_messages(self._memory_size) return "" if len(messages) == 0 else messages[-1] + def get_system_prompt(self) -> str: + return self._agent_info + + def to_json(self): + messages = [] + for message in self.all(): + if message["is_user"]: + messages.append({"role": "user", "message": message["message"]}) + else: + messages.append({"role": "assistant", "message": message["message"]}) + return messages + def clear(self): self._messages = [] @property def size(self): return self._memory_size + + @property + def agent_info(self): + return self._agent_info diff --git a/pandasai/helpers/openai_info.py b/pandasai/helpers/openai_info.py index c732059b5..2ba2f97e9 100644 --- a/pandasai/helpers/openai_info.py +++ b/pandasai/helpers/openai_info.py @@ -48,8 +48,8 @@ "gpt-35-turbo-16k-completion": 0.004, "gpt-35-turbo-16k-0613-completion": 0.004, # Fine-tuned input - "gpt-3.5-turbo-0613-finetuned": 0.008, - "gpt-3.5-turbo-1106-finetuned": 0.008, + "gpt-3.5-turbo-0613-finetuned": 0.012, + "gpt-3.5-turbo-1106-finetuned": 0.012, # Fine-tuned output "gpt-3.5-turbo-0613-finetuned-completion": 0.016, "gpt-3.5-turbo-1106-finetuned-completion": 0.016, diff --git a/pandasai/helpers/output_validator.py b/pandasai/helpers/output_validator.py new file mode 100644 index 000000000..d6cb559b0 --- /dev/null +++ b/pandasai/helpers/output_validator.py @@ -0,0 +1,62 @@ +import re +from typing import Any, Iterable + +import pandas as pd + + +class OutputValidator: + @staticmethod + def validate(expected_type, result: dict[str, Any]) -> tuple[bool, Iterable[str]]: + """ + Validate 'type' and 'value' from the result dict. + + Args: + result (dict[str, Any]): The result of code execution in + dict representation. Should have the following schema: + { + "type": , + "value": + } + + Returns: + (tuple(bool, Iterable(str)): + Boolean value whether the result matches output type + and collection of logs containing messages about + 'type' or 'value' mismatches. + """ + validation_logs = [] + result_type, result_value = result.get("type"), result.get("value") + + type_ok = OutputValidator.validate_type(result_type, expected_type) + if not type_ok: + validation_logs.append( + f"The result dict contains inappropriate 'type'. " + f"Expected '{expected_type}', actual '{result_type}'." + ) + value_ok = OutputValidator.validate_value(result_value, expected_type) + if not value_ok: + validation_logs.append( + f"result value {repr(result_value)} seems to be inappropriate " + f"for the type '{expected_type}'." + ) + + return all((type_ok, value_ok)), validation_logs + + def validate_type(self, expected_type: str) -> bool: + return self == expected_type if expected_type else True + + def validate_value(self, expected_type: str) -> bool: + if not expected_type: + return True + elif expected_type == "number": + return isinstance(self, (int, float)) + elif expected_type == "string": + return isinstance(self, str) + elif expected_type == "dataframe": + return isinstance(self, (pd.DataFrame, pd.Series)) + elif expected_type == "plot": + if not isinstance(self, str): + return False + + path_to_plot_pattern = r"^(\/[\w.-]+)+(/[\w.-]+)*$|^[^\s/]+(/[\w.-]+)*$" + return bool(re.match(path_to_plot_pattern, self)) diff --git a/pandasai/helpers/query_exec_tracker.py b/pandasai/helpers/query_exec_tracker.py index 323a022d3..5b538ca9f 100644 --- a/pandasai/helpers/query_exec_tracker.py +++ b/pandasai/helpers/query_exec_tracker.py @@ -2,12 +2,17 @@ import json import os import time -import uuid from collections import defaultdict from typing import Any, List, TypedDict, Union import requests +from pandasai.connectors import BaseConnector +from pandasai.pipelines.chat.chat_pipeline_input import ( + ChatPipelineInput, +) +from pandasai.pipelines.pipeline_context import PipelineContext + class ResponseType(TypedDict): type: str @@ -16,10 +21,10 @@ class ResponseType(TypedDict): exec_steps = { "cache_hit": "Cache Hit", - "_get_prompt": "Generate Prompt", + "get_prompt": "Generate Prompt", "generate_code": "Generate Code", "execute_code": "Code Execution", - "_retry_run_code": "Retry Code Generation", + "retry_run_code": "Retry Code Generation", "parse": "Parse Output", } @@ -27,6 +32,7 @@ class ResponseType(TypedDict): class QueryExecTracker: _query_info: dict _dataframes: List + _skills: List _response: ResponseType _steps: List _func_exec_count: dict @@ -42,66 +48,44 @@ def __init__( self._start_time = None self._server_config = server_config self._query_info = {} - self._is_related_query = True - - def set_related_query(self, flag: bool): - """ - Set Related Query Parameter whether new query is related to the conversation - or not - Args: - flag (bool): boolean to set true if related else false - """ - self._is_related_query = flag - - def add_query_info( - self, - conversation_id: uuid.UUID, - instance: str, - query: str, - output_type: str, - ): - """ - Adds query information for new track - Args: - conversation_id (str): conversation id - instance (str): instance like Agent or SmartDataframe - query (str): chat query given by user - output_type (str): output type expected by user - """ - self._query_info = { - "conversation_id": str(conversation_id), - "instance": instance, - "query": query, - "output_type": output_type, - "is_related_query": self._is_related_query, - } - def start_new_track(self): + def start_new_track(self, input: ChatPipelineInput): """ Resets tracking variables to start new track """ self._last_log_id = None self._start_time = time.time() self._dataframes: List = [] + self._skills: List = [] self._response: ResponseType = {} self._steps: List = [] self._query_info = {} self._func_exec_count: dict = defaultdict(int) + self._query_info = { + "conversation_id": str(input.conversation_id), + "instance": "Agent", + "query": input.query, + "output_type": input.output_type, + } + def convert_dataframe_to_dict(self, df): json_data = json.loads(df.to_json(orient="split", date_format="iso")) return {"headers": json_data["columns"], "rows": json_data["data"]} - def add_dataframes(self, dfs: List) -> None: + def add_dataframes(self, dfs: List[BaseConnector]) -> None: """ Add used dataframes for the query to query exec tracker Args: - dfs (List[SmartDataFrame]): List of dataframes + dfs (List[BaseConnector]): List of dataframes """ for df in dfs: - head = df.head_df + head = df.get_schema() self._dataframes.append(self.convert_dataframe_to_dict(head)) + def add_skills(self, context: PipelineContext): + self._skills = context.skills_manager.to_object() + def add_step(self, step: dict) -> None: """ Add Custom Step that is performed for additional information @@ -110,6 +94,9 @@ def add_step(self, step: dict) -> None: """ self._steps.append(step) + def set_final_response(self, response: Any): + self._response = response + def execute_func(self, function, *args, **kwargs) -> Any: """ Tracks function executions, calculates execution time and prepare data @@ -164,16 +151,16 @@ def _generate_exec_step(self, func_name: str, result: Any) -> dict: step = {"type": exec_steps[func_name]} - if func_name == "_get_prompt": + if func_name == "get_prompt": step["prompt_class"] = result.__class__.__name__ step["generated_prompt"] = result.to_string() - elif func_name == "_retry_run_code": - self._func_exec_count["_retry_run_code"] += 1 + elif func_name == "retry_run_code": + self._func_exec_count["retry_run_code"] += 1 step[ "type" - ] = f"{exec_steps[func_name]} ({self._func_exec_count['_retry_run_code']})" + ] = f"{exec_steps[func_name]} ({self._func_exec_count['retry_run_code']})" step["code_generated"] = result elif func_name in {"cache_hit", "generate_code"}: @@ -224,6 +211,7 @@ def get_summary(self) -> dict: execution_time = time.time() - self._start_time return { "query_info": self._query_info, + "skills": self._skills, "dataframes": self._dataframes, "steps": self._steps, "response": self._response, @@ -242,23 +230,24 @@ def publish(self) -> None: server_url = None if self._server_config is None: - server_url = os.environ.get("LOGGING_SERVER_URL") or None - api_key = os.environ.get("LOGGING_SERVER_API_KEY") or None + server_url = os.environ.get("PANDASAI_API_URL", "https://api.domer.ai") + api_key = os.environ.get("PANDASAI_API_KEY") or None else: server_url = self._server_config.get( - "server_url", os.environ.get("LOGGING_SERVER_URL") + "server_url", os.environ.get("PANDASAI_API_URL", "https://api.domer.ai") ) api_key = self._server_config.get( - "api_key", os.environ.get("LOGGING_SERVER_API_KEY") + "api_key", os.environ.get("PANDASAI_API_KEY") ) - if api_key is None or server_url is None: + if api_key is None: return try: log_data = { "json_log": self.get_summary(), } + headers = {"Authorization": f"Bearer {api_key}"} response = requests.post( f"{server_url}/api/log/add", json=log_data, headers=headers diff --git a/pandasai/helpers/request.py b/pandasai/helpers/request.py new file mode 100644 index 000000000..723ad0fdd --- /dev/null +++ b/pandasai/helpers/request.py @@ -0,0 +1,77 @@ +import logging +import os +import traceback +from urllib.parse import urljoin + +import requests + +from pandasai.exceptions import PandasAIApiCallError, PandasAIApiKeyError +from pandasai.helpers.logger import Logger + + +class Session: + _api_key: str + _endpoint_url: str + _logger: Logger + + def __init__( + self, endpoint_url: str = None, api_key: str = None, logger: Logger = None + ) -> None: + if api_key is None: + api_key = os.environ.get("PANDASAI_API_KEY") or None + if api_key is None: + raise PandasAIApiKeyError() + self._api_key = api_key + + if endpoint_url is None: + endpoint_url = os.environ.get("PANDASAI_API_URL", "https://api.domer.ai") + + self._endpoint_url = endpoint_url + self._version_path = "/api" + self._logger = logger or Logger() + + def get(self, path=None, **kwargs): + return self.make_request("GET", path, **kwargs)["data"] + + def post(self, path=None, **kwargs): + return self.make_request("POST", path, **kwargs) + + def patch(self, path=None, **kwargs): + return self.make_request("PATCH", path, **kwargs) + + def put(self, path=None, **kwargs): + return self.make_request("PUT", path, **kwargs) + + def delete(self, path=None, **kwargs): + return self.make_request("DELETE", path, **kwargs) + + def make_request( + self, method, path, headers=None, params=None, data=None, json=None, timeout=300 + ): + try: + url = urljoin(self._endpoint_url, self._version_path + path) + if headers is None: + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", # or any other headers you need + } + + response = requests.request( + method, + url, + headers=headers, + params=params, + data=data, + json=json, + timeout=timeout, + ) + + data = response.json() + if response.status_code == 400: + raise PandasAIApiCallError(data["message"]) + + return data + + except requests.exceptions.RequestException as e: + self._logger.log(f"Request failed: {traceback.format_exc()}", logging.ERROR) + raise PandasAIApiCallError(f"Request failed: {e}") from e diff --git a/pandasai/helpers/skills_manager.py b/pandasai/helpers/skills_manager.py index 14386c1f8..b99dbcfbd 100644 --- a/pandasai/helpers/skills_manager.py +++ b/pandasai/helpers/skills_manager.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import Optional from pandasai.skills import Skill @@ -8,12 +8,9 @@ class SkillsManager: Manages Custom added Skills and tracks used skills for the query """ - _skills: List - _used_skills: List[str] - def __init__(self) -> None: - self._skills = [] - self._used_skills = [] + self.skills = [] + self.used_skills = [] def add_skills(self, *skills: Skill): """ @@ -24,12 +21,10 @@ def add_skills(self, *skills: Skill): *skills: Variable number of skill objects to add. """ for skill in skills: - if any( - existing_skill.name == skill.name for existing_skill in self._skills - ): + if any(existing_skill.name == skill.name for existing_skill in self.skills): raise ValueError(f"Skill with name '{skill.name}' already exists.") - self._skills.extend(skills) + self.skills.extend(skills) def skill_exists(self, name: str): """ @@ -41,7 +36,16 @@ def skill_exists(self, name: str): Returns: bool: True if a skill with the given name exists, False otherwise. """ - return any(skill.name == name for skill in self._skills) + return any(skill.name == name for skill in self.skills) + + def has_skills(self): + """ + Check if there are any skills in the list of skills. + + Returns: + bool: True if there are skills, False otherwise. + """ + return len(self.skills) > 0 def get_skill_by_func_name(self, name: str): """ @@ -53,11 +57,11 @@ def get_skill_by_func_name(self, name: str): Returns: Skill or None: The skill with the given name, or None if not found. """ - return next((skill for skill in self._skills if skill.name == name), None) + return next((skill for skill in self.skills if skill.name == name), None) def add_used_skill(self, skill: str): if self.skill_exists(skill): - self._used_skills.append(skill) + self.used_skills.append(skill) def __str__(self) -> str: """ @@ -65,25 +69,16 @@ def __str__(self) -> str: Returns: str: _description_ """ - return "".join(str(skill) for skill in self._skills) + return "".join(str(skill) for skill in self.skills) def prompt_display(self) -> Optional[str]: """ Displays skills for prompt """ - if len(self._skills) == 0: + if len(self.skills) == 0: return None return f"You can call the following functions that have been pre-defined for you:\n{self}" - @property - def used_skills(self): - return self._used_skills - - @used_skills.setter - def used_skills(self, value): - self._used_skills = value - - @property - def skills(self): - return self._skills + def to_object(self) -> str: + return [skill.stringify() for skill in self.skills] diff --git a/pandasai/helpers/viz_library_types/__init__.py b/pandasai/helpers/viz_library_types/__init__.py deleted file mode 100644 index 0c8a75e92..000000000 --- a/pandasai/helpers/viz_library_types/__init__.py +++ /dev/null @@ -1,66 +0,0 @@ -import logging -from typing import Optional, Union - -from .. import Logger -from ._viz_library_types import ( - MatplotlibVizLibraryType, - NoVizLibraryType, - PlotlyVizLibraryType, - SeabornVizLibraryType, -) -from .base import VisualizationLibrary - -viz_lib_map = { - VisualizationLibrary.MATPLOTLIB.value: MatplotlibVizLibraryType, - VisualizationLibrary.PLOTLY.value: PlotlyVizLibraryType, - VisualizationLibrary.SEABORN.value: SeabornVizLibraryType, -} - - -def viz_lib_type_factory( - viz_lib_type: str = None, logger: Optional[Logger] = None -) -> Union[ - MatplotlibVizLibraryType, - PlotlyVizLibraryType, - SeabornVizLibraryType, -]: - """ - Factory function to get appropriate instance for viz library type. - - Uses `viz_library_types_map` to determine the viz library type class. - - Args: - viz_lib_type (Optional[str]): A name of the viz library type. - Defaults to None, an instance of `DefaultVizLibraryType` will be - returned. - logger (Optional[str]): If passed, collects logs about correctness - of the `viz_library_type` argument and what kind of VizLibraryType - is created. - - Returns: - (Union[ - MatplotlibVizLibraryType, - PlotlyVizLibraryType, - SeabornVizLibraryType, - DefaultVizLibraryType - ]): An instance of the output type. - """ - - if viz_lib_type is not None and viz_lib_type not in viz_lib_map and logger: - possible_types_msg = ", ".join(f"'{type_}'" for type_ in viz_lib_map) - logger.log( - f"Unknown value for the parameter `viz_library_type`: '{viz_lib_type}'." - f"Possible values are: {possible_types_msg} and None for default " - f"viz library type (miscellaneous).", - level=logging.WARNING, - ) - - viz_lib_default = NoVizLibraryType - viz_lib_type_helper = viz_lib_map.get(viz_lib_type, viz_lib_default)() - - if logger: - logger.log( - f"{viz_lib_type_helper.__class__} is going to be used.", level=logging.DEBUG - ) - - return viz_lib_type_helper diff --git a/pandasai/helpers/viz_library_types/_viz_library_types.py b/pandasai/helpers/viz_library_types/_viz_library_types.py deleted file mode 100644 index f00ca2bae..000000000 --- a/pandasai/helpers/viz_library_types/_viz_library_types.py +++ /dev/null @@ -1,75 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Iterable - -from pandasai.prompts.generate_python_code import VizLibraryPrompt - - -class BaseVizLibraryType(ABC): - @property - def template_hint(self) -> str: - return VizLibraryPrompt(library=self.name) - - @property - @abstractmethod - def name(self) -> str: - ... - - def _validate_type(self, actual_type: str) -> bool: - return actual_type == self.name - - def validate(self, result: dict[str, Any]) -> tuple[bool, Iterable[str]]: - """ - Validate 'type' and 'constraint' from the result dict. - - Args: - result (dict[str, Any]): The result of code execution in - dict representation. Should have the following schema: - { - "viz_library_type": - } - - Returns: - (tuple(bool, Iterable(str)): - Boolean value whether the result matches output type - and collection of logs containing messages about - 'type' mismatches. - """ - validation_logs = [] - actual_type = result.get("type") - - type_ok = self._validate_type(actual_type) - if not type_ok: - validation_logs.append( - f"The result dict contains inappropriate 'type'. " - f"Expected '{self.name}', actual '{actual_type}'." - ) - - return type_ok, validation_logs - - -class NoVizLibraryType(BaseVizLibraryType): - @property - def template_hint(self) -> str: - return "" - - @property - def name(self): - return "no_viz_library" - - -class MatplotlibVizLibraryType(BaseVizLibraryType): - @property - def name(self): - return "matplotlib" - - -class PlotlyVizLibraryType(BaseVizLibraryType): - @property - def name(self): - return "plotly" - - -class SeabornVizLibraryType(BaseVizLibraryType): - @property - def name(self): - return "seaborn" diff --git a/pandasai/helpers/viz_library_types/base.py b/pandasai/helpers/viz_library_types/base.py deleted file mode 100644 index e53e4fa4c..000000000 --- a/pandasai/helpers/viz_library_types/base.py +++ /dev/null @@ -1,19 +0,0 @@ -from enum import Enum - - -class VisualizationLibrary(str, Enum): - """ - VisualizationLibrary is an enumeration that represents the available - data visualization libraries. - - Attributes: - MATPLOTLIB (str): Represents the Matplotlib library. - SEABORN (str): Represents the Seaborn library. - PLOTLY (str): Represents the Plotly library. - """ - - MATPLOTLIB = "matplotlib" - SEABORN = "seaborn" - PLOTLY = "plotly" - - DEFAULT = "default" diff --git a/pandasai/llm/__init__.py b/pandasai/llm/__init__.py index 2c697e37b..95cafe46d 100644 --- a/pandasai/llm/__init__.py +++ b/pandasai/llm/__init__.py @@ -1,17 +1,13 @@ from .azure_openai import AzureOpenAI -from .base import LLM, HuggingFaceLLM -from .falcon import Falcon -from .google_gemini import GoogleGemini +from .base import LLM from .google_palm import GooglePalm from .google_vertexai import GoogleVertexAI from .huggingface_text_gen import HuggingFaceTextGen from .langchain import LangchainLLM from .openai import OpenAI -from .starcoder import Starcoder __all__ = [ "LLM", - "HuggingFaceLLM", "AzureOpenAI", "OpenAI", "Falcon", @@ -20,5 +16,4 @@ "GoogleVertexAI", "HuggingFaceTextGen", "LangchainLLM", - "Starcoder", ] diff --git a/pandasai/llm/bamboo_llm.py b/pandasai/llm/bamboo_llm.py new file mode 100644 index 000000000..a17986937 --- /dev/null +++ b/pandasai/llm/bamboo_llm.py @@ -0,0 +1,24 @@ +from typing import Optional + +from pandasai.helpers.request import Session +from pandasai.llm.base import LLM +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.base import BasePrompt + + +class BambooLLM(LLM): + _session: Session + + def __init__( + self, endpoint_url: Optional[str] = None, api_key: Optional[str] = None + ): + self._session = Session(endpoint_url=endpoint_url, api_key=api_key) + + def call(self, instruction: BasePrompt, context: PipelineContext = None) -> str: + data = instruction.to_json() + response = self._session.post("/llm/chat", json=data) + return response["data"] + + @property + def type(self) -> str: + return "bamboo_llm" diff --git a/pandasai/llm/base.py b/pandasai/llm/base.py index be6c2457b..7e2f5f920 100644 --- a/pandasai/llm/base.py +++ b/pandasai/llm/base.py @@ -1,7 +1,7 @@ """ Base class to implement a new LLM This module is the base class to integrate the various LLMs API. This module also -includes the Base LLM classes for OpenAI, HuggingFace and Google PaLM. +includes the Base LLM classes for OpenAI and Google PaLM. Example: @@ -13,24 +13,27 @@ class CustomLLM(BaseOpenAI): Custom Class Starts here!! ``` """ +from __future__ import annotations import ast -import os import re from abc import abstractmethod -from typing import Any, Dict, Mapping, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Tuple, Union -import requests +from pandasai.helpers.memory import Memory +from pandasai.prompts.generate_system_message import GenerateSystemMessagePrompt from ..exceptions import ( APIKeyNotFoundError, - LLMResponseHTTPError, MethodNotImplementedError, NoCodeFoundError, ) from ..helpers.openai import is_openai_v1 from ..helpers.openai_info import openai_callback_var -from ..prompts.base import AbstractPrompt +from ..prompts.base import BasePrompt + +if TYPE_CHECKING: + from pandasai.pipelines.pipeline_context import PipelineContext class LLM: @@ -120,6 +123,30 @@ def _extract_code(self, response: str, separator: str = "```") -> str: return code + def prepend_system_prompt(self, prompt: BasePrompt, memory: Memory): + """ + Append system prompt to the chat prompt, useful when model doesn't have messages for chat history + Args: + prompt (BasePrompt): prompt for chat method + memory (Memory): user conversation history + """ + return self.get_system_prompt(memory) + prompt if memory else prompt + + def get_system_prompt(self, memory: Memory) -> Any: + """ + Generate system prompt with agent info and previous conversations + """ + system_prompt = GenerateSystemMessagePrompt(memory=memory) + return system_prompt.to_string() + + def get_messages(self, memory: Memory) -> Any: + """ + Return formatted messages + Args: + memory (Memory): Get past Conversation from memory + """ + return memory.get_previous_conversation() + def _extract_tag_text(self, response: str, tag: str) -> str: """ Extracts the text between two tags in the response. @@ -141,13 +168,13 @@ def _extract_tag_text(self, response: str, tag: str) -> str: return None @abstractmethod - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: + def call(self, instruction: BasePrompt, context: PipelineContext = None) -> str: """ Execute the LLM with given prompt. Args: - instruction (AbstractPrompt): A prompt object with instruction for LLM. - suffix (str, optional): Suffix. Defaults to "". + instruction (BasePrompt): A prompt object with instruction for LLM. + context (PipelineContext, optional): PipelineContext. Defaults to None. Raises: MethodNotImplementedError: Call method has not been implemented @@ -155,18 +182,18 @@ def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: """ raise MethodNotImplementedError("Call method has not been implemented") - def generate_code(self, instruction: AbstractPrompt) -> str: + def generate_code(self, instruction: BasePrompt, context: PipelineContext) -> str: """ Generate the code based on the instruction and the given prompt. Args: - instruction (AbstractPrompt): Prompt with instruction for LLM. + instruction (BasePrompt): Prompt with instruction for LLM. Returns: str: A string of Python code. """ - response = self.call(instruction, suffix="") + response = self.call(instruction, context) return self._extract_code(response) @@ -255,12 +282,10 @@ def _invocation_params(self) -> Dict[str, Any]: """Get the parameters used to invoke the model.""" openai_creds: Dict[str, Any] = {} if not is_openai_v1(): - openai_creds.update( - { - "api_key": self.api_token, - "api_base": self.api_base, - } - ) + openai_creds |= { + "api_key": self.api_token, + "api_base": self.api_base, + } return {**openai_creds, **self._default_params} @@ -275,7 +300,7 @@ def _client_params(self) -> Dict[str, any]: "http_client": self.http_client, } - def completion(self, prompt: str) -> str: + def completion(self, prompt: str, memory: Memory) -> str: """ Query the completion API @@ -286,6 +311,8 @@ def completion(self, prompt: str) -> str: str: LLM response. """ + prompt = self.prepend_system_prompt(prompt, memory) + params = {**self._invocation_params, "prompt": prompt} if self.stop is not None: @@ -296,9 +323,11 @@ def completion(self, prompt: str) -> str: if openai_handler := openai_callback_var.get(): openai_handler(response) + self.last_prompt = prompt + return response.choices[0].text - def chat_completion(self, value: str) -> str: + def chat_completion(self, value: str, memory: Memory) -> str: """ Query the chat completion API @@ -309,14 +338,35 @@ def chat_completion(self, value: str) -> str: str: LLM response. """ + messages = [] + if memory: + if memory.agent_info: + messages.append( + { + "role": "system", + "content": memory.get_system_prompt(), + } + ) + + for message in memory.all(): + if message["is_user"]: + messages.append({"role": "user", "content": message["message"]}) + else: + messages.append( + {"role": "assistant", "content": message["message"]} + ) + + # adding current prompt as latest query message + messages.append( + { + "role": "user", + "content": value, + }, + ) + params = { **self._invocation_params, - "messages": [ - { - "role": "system", - "content": value, - } - ], + "messages": messages, } if self.stop is not None: @@ -329,13 +379,13 @@ def chat_completion(self, value: str) -> str: return response.choices[0].message.content - def call(self, instruction: AbstractPrompt, suffix: str = ""): + def call(self, instruction: BasePrompt, context: PipelineContext = None): """ Call the OpenAI LLM. Args: - instruction (AbstractPrompt): A prompt object with instruction for LLM. - suffix (str): Suffix to pass. + instruction (BasePrompt): A prompt object with instruction for LLM. + context (PipelineContext): context to pass. Raises: UnsupportedModelError: Unsupported model @@ -343,127 +393,16 @@ def call(self, instruction: AbstractPrompt, suffix: str = ""): Returns: str: Response """ - self.last_prompt = instruction.to_string() + suffix + self.last_prompt = instruction.to_string() + + memory = context.memory if context else None return ( - self.chat_completion(self.last_prompt) + self.chat_completion(self.last_prompt, memory) if self._is_chat_model - else self.completion(self.last_prompt) - ) - - -class HuggingFaceLLM(LLM): - """Base class to implement a new Hugging Face LLM. - - LLM base class is extended to be used with HuggingFace LLM Modes APIs. - - """ - - last_prompt: Optional[str] = None - api_token: str - _api_url: str = "https://api-inference.huggingface.co/models/" - _max_retries: int = 3 - - @property - def type(self) -> str: - return "huggingface-llm" - - def _setup(self, **kwargs): - """ - Setup the HuggingFace LLM - - Args: - **kwargs: ["api_token", "max_retries"] - - """ - self.api_token = ( - kwargs.get("api_token") or os.getenv("HUGGINGFACE_API_KEY") or None - ) - if self.api_token is None: - raise APIKeyNotFoundError("HuggingFace Hub API key is required") - - # Since the huggingface API only returns few tokens at a time, we need to - # call the API multiple times to get all the tokens. This is the maximum - # number of retries we will do. - if kwargs.get("max_retries"): - self._max_retries = kwargs.get("max_retries") - - def __init__(self, **kwargs): - """ - __init__ method of HuggingFaceLLM Class - - Args: - **kwargs: ["api_token", "max_retries"] - - """ - self._setup(**kwargs) - - def query(self, payload) -> str: - """ - Query the HF API - Args: - payload: A JSON form payload - - Returns: - str: Value of the field "generated_text" in response JSON - given by the remote server. - - Raises: - LLMResponseHTTPError: If api-inference.huggingface.co responses - with any error HTTP code (>= 400). - - """ - - headers = {"Authorization": f"Bearer {self.api_token}"} - - response = requests.post( - self._api_url, headers=headers, json=payload, timeout=60 + else self.completion(self.last_prompt, memory) ) - if response.status_code >= 400: - try: - error_msg = response.json().get("error") - except (requests.exceptions.JSONDecodeError, TypeError): - error_msg = None - - raise LLMResponseHTTPError( - status_code=response.status_code, error_msg=error_msg - ) - - return response.json()[0]["generated_text"] - - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: - """ - A call method of HuggingFaceLLM class. - Args: - instruction (AbstractPrompt): A prompt object with instruction for LLM. - suffix (str): A string representing the suffix to be truncated - from the generated response. - - Returns - str: LLM response. - - """ - - prompt = instruction.to_string() - payload = prompt + suffix - - # sometimes the API doesn't return a valid response, so we retry passing the - # output generated from the previous call as the input - for _i in range(self._max_retries): - response = self.query({"inputs": payload}) - payload = response - - match = re.search( - "(```python)(.*)(```)", - response.replace(prompt + suffix, ""), - re.DOTALL | re.MULTILINE, - ) - if match: - break - - return response.replace(prompt + suffix, "") - class BaseGoogle(LLM): """Base class to implement a new Google LLM @@ -514,7 +453,7 @@ def _validate(self): raise ValueError("max_output_tokens must be greater than zero") @abstractmethod - def _generate_text(self, prompt: str) -> str: + def _generate_text(self, prompt: str, memory: Memory) -> str: """ Generates text for prompt, specific to implementation. @@ -527,17 +466,18 @@ def _generate_text(self, prompt: str) -> str: """ raise MethodNotImplementedError("method has not been implemented") - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: + def call(self, instruction: BasePrompt, context: PipelineContext = None) -> str: """ Call the Google LLM. Args: - instruction (AbstractPrompt): Instruction to pass. - suffix (str): Suffix to pass. Defaults to an empty string (""). + instruction (BasePrompt): Instruction to pass. + context (PipelineContext): Pass PipelineContext. Returns: str: LLM response. """ - self.last_prompt = instruction.to_string() + suffix - return self._generate_text(self.last_prompt) + self.last_prompt = instruction.to_string() + memory = context.memory if context else None + return self._generate_text(self.last_prompt, memory) diff --git a/pandasai/llm/fake.py b/pandasai/llm/fake.py index 8d95bef1d..c93fcf174 100644 --- a/pandasai/llm/fake.py +++ b/pandasai/llm/fake.py @@ -2,7 +2,9 @@ from typing import Optional -from ..prompts.base import AbstractPrompt +from pandasai.pipelines.pipeline_context import PipelineContext + +from ..prompts.base import BasePrompt from .base import LLM @@ -15,8 +17,8 @@ def __init__(self, output: Optional[str] = None): if output is not None: self._output = output - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: - self.last_prompt = instruction.to_string() + suffix + def call(self, instruction: BasePrompt, context: PipelineContext = None) -> str: + self.last_prompt = instruction.to_string() return self._output @property diff --git a/pandasai/llm/falcon.py b/pandasai/llm/falcon.py deleted file mode 100644 index 5b50498a8..000000000 --- a/pandasai/llm/falcon.py +++ /dev/null @@ -1,38 +0,0 @@ -""" Falcon LLM -This module is to run the Falcon API hosted and maintained by HuggingFace.co. -To generate HF_TOKEN go to https://huggingface.co/settings/tokens after creating Account -on the platform. - -Example: - Use below example to call Falcon Model - - >>> from pandasai.llm.falcon import Falcon -""" -import warnings - -from ..helpers import load_dotenv -from .base import HuggingFaceLLM - -load_dotenv() - - -class Falcon(HuggingFaceLLM): - """Falcon LLM API (Deprecated: Kept for backwards compatibility)""" - - api_token: str - _api_url: str = ( - "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct" - ) - _max_retries: int = 30 - - def __init__(self, **kwargs): - warnings.warn( - """Falcon has been deprecated as of version 1.5. - Please choose a different LLM instead from the ones listed in - https://docs.pandas-ai.com/en/latest/API/llms/ - """ - ) - - @property - def type(self) -> str: - return "falcon" diff --git a/pandasai/llm/google_palm.py b/pandasai/llm/google_palm.py index 2f8e49abf..d21d68617 100644 --- a/pandasai/llm/google_palm.py +++ b/pandasai/llm/google_palm.py @@ -11,6 +11,8 @@ """ from typing import Any +from pandasai.helpers.memory import Memory + from ..exceptions import APIKeyNotFoundError from ..helpers.optional import import_dependency from .base import BaseGoogle @@ -70,7 +72,7 @@ def _validate(self): if not self.model: raise ValueError("model is required.") - def _generate_text(self, prompt: str) -> str: + def _generate_text(self, prompt: str, memory: Memory = None) -> str: """ Generates text for prompt. @@ -82,6 +84,7 @@ def _generate_text(self, prompt: str) -> str: """ self._validate() + prompt = self.prepend_system_prompt(prompt, memory) completion = self.google_palm.generate_text( model=self.model, prompt=prompt, @@ -90,6 +93,7 @@ def _generate_text(self, prompt: str) -> str: top_k=self.top_k, max_output_tokens=self.max_output_tokens, ) + self.last_prompt = prompt return completion.result @property diff --git a/pandasai/llm/google_vertexai.py b/pandasai/llm/google_vertexai.py index 1bee3f7e9..60b190b29 100644 --- a/pandasai/llm/google_vertexai.py +++ b/pandasai/llm/google_vertexai.py @@ -12,6 +12,8 @@ """ from typing import Optional +from pandasai.helpers.memory import Memory + from ..exceptions import UnsupportedModelError from ..helpers.optional import import_dependency from .base import BaseGoogle @@ -42,6 +44,7 @@ class GoogleVertexAI(BaseGoogle): _supported_generative_models = [ "gemini-pro", ] + _supported_code_chat_models = ["codechat-bison@001", "codechat-bison@002"] def __init__( self, project_id: str, location: str, model: Optional[str] = None, **kwargs @@ -96,7 +99,7 @@ def _validate(self): if not self.model: raise ValueError("model is required.") - def _generate_text(self, prompt: str) -> str: + def _generate_text(self, prompt: str, memory: Memory = None) -> str: """ Generates text for prompt. @@ -109,13 +112,13 @@ def _generate_text(self, prompt: str) -> str: """ self._validate() - from vertexai.preview.generative_models import GenerativeModel - from vertexai.preview.language_models import ( - CodeGenerationModel, - TextGenerationModel, - ) + updated_prompt = self.prepend_system_prompt(prompt, memory) + + self.last_prompt = updated_prompt if self.model in self._supported_code_models: + from vertexai.preview.language_models import CodeGenerationModel + code_generation = CodeGenerationModel.from_pretrained(self.model) completion = code_generation.predict( @@ -124,19 +127,24 @@ def _generate_text(self, prompt: str) -> str: max_output_tokens=self.max_output_tokens, ) elif self.model in self._supported_text_models: + from vertexai.preview.language_models import TextGenerationModel + text_generation = TextGenerationModel.from_pretrained(self.model) completion = text_generation.predict( - prompt=prompt, + prompt=updated_prompt, temperature=self.temperature, top_p=self.top_p, top_k=self.top_k, max_output_tokens=self.max_output_tokens, ) elif self.model in self._supported_generative_models: + from vertexai.preview.generative_models import GenerativeModel + model = GenerativeModel(self.model) + responses = model.generate_content( - [prompt], + [updated_prompt], generation_config={ "max_output_tokens": self.max_output_tokens, "temperature": self.temperature, @@ -146,6 +154,28 @@ def _generate_text(self, prompt: str) -> str: ) completion = responses.candidates[0].content.parts[0] + elif self.model in self._supported_code_chat_models: + from vertexai.language_models import ChatMessage, CodeChatModel + + code_chat_model = CodeChatModel.from_pretrained(self.model) + messages = [] + + for message in memory.all(): + if message["is_user"]: + messages.append( + ChatMessage(author="user", content=message["message"]) + ) + else: + messages.append( + ChatMessage(author="model", content=message["message"]) + ) + chat = code_chat_model.start_chat( + context=memory.get_system_prompt(), message_history=messages + ) + + response = chat.send_message(prompt) + return response.text + else: raise UnsupportedModelError(self.model) diff --git a/pandasai/llm/huggingface_text_gen.py b/pandasai/llm/huggingface_text_gen.py index 6439d566a..aa7162fc4 100644 --- a/pandasai/llm/huggingface_text_gen.py +++ b/pandasai/llm/huggingface_text_gen.py @@ -1,9 +1,15 @@ -from typing import Any, Dict, List, Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, List, Optional from ..helpers import load_dotenv -from ..prompts.base import AbstractPrompt +from ..prompts.base import BasePrompt from .base import LLM +if TYPE_CHECKING: + from pandasai.pipelines.pipeline_context import PipelineContext + + load_dotenv() @@ -75,8 +81,12 @@ def _default_params(self) -> Dict[str, Any]: "seed": self.seed, } - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: - prompt = instruction.to_string() + suffix + def call(self, instruction: BasePrompt, context: PipelineContext = None) -> str: + prompt = instruction.to_string() + + memory = context.memory if context else None + + prompt = self.prepend_system_prompt(prompt, memory) params = self._default_params if self.streaming: @@ -92,6 +102,7 @@ def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: res.generated_text = res.generated_text[ : res.generated_text.index(stop_seq) ] + self.last_prompt = prompt return res.generated_text @property diff --git a/pandasai/llm/langchain.py b/pandasai/llm/langchain.py index 9e2f8f31a..73ca127a8 100644 --- a/pandasai/llm/langchain.py +++ b/pandasai/llm/langchain.py @@ -1,3 +1,5 @@ +from __future__ import annotations + try: from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.language_models.llms import BaseLLM @@ -8,12 +10,15 @@ BaseLLM = BaseChatModel = object LANGCHAIN_AVAILABLE = False -from typing import Union +from typing import TYPE_CHECKING, Union -from pandasai.prompts.base import AbstractPrompt +from pandasai.prompts.base import BasePrompt from .base import LLM +if TYPE_CHECKING: + from pandasai.pipelines.pipeline_context import PipelineContext + """Langchain LLM This module is to run LLM using LangChain framework. @@ -30,17 +35,26 @@ class LangchainLLM(LLM): with LangChain. """ + langchain_llm = None + def __init__(self, langchain_llm: Union[BaseLLM, BaseChatModel]): - self._langchain_llm = langchain_llm + self.langchain_llm = langchain_llm - def call(self, instruction: AbstractPrompt, suffix: str = "") -> str: + def call( + self, instruction: BasePrompt, context: PipelineContext = None, suffix: str = "" + ) -> str: prompt = instruction.to_string() + suffix - res = self._langchain_llm.invoke(prompt) - if isinstance(self._langchain_llm, BaseChatModel): - return res.content + memory = context.memory if context else None + prompt = self.prepend_system_prompt(prompt, memory) + self.last_prompt = prompt + + res = self.langchain_llm.invoke(prompt) + return res.content if isinstance(self.langchain_llm, BaseChatModel) else res - return res + @staticmethod + def is_langchain_llm(llm: LLM) -> bool: + return hasattr(llm, "_llm_type") @property def type(self) -> str: - return f"langchain_{self._langchain_llm._llm_type}" + return f"langchain_{self.langchain_llm._llm_type}" diff --git a/pandasai/llm/starcoder.py b/pandasai/llm/starcoder.py deleted file mode 100644 index 79e5673bf..000000000 --- a/pandasai/llm/starcoder.py +++ /dev/null @@ -1,37 +0,0 @@ -""" Starcoder LLM -This module is to run the StartCoder API hosted and maintained by HuggingFace.co. -To generate HF_TOKEN go to https://huggingface.co/settings/tokens after creating Account -on the platform. - -Example: - Use below example to call Starcoder Model - - >>> from pandasai.llm.starcoder import Starcoder -""" -import warnings - -from ..helpers import load_dotenv -from .base import HuggingFaceLLM - -load_dotenv() - - -class Starcoder(HuggingFaceLLM): - - """Starcoder LLM API (Deprecated: Kept for backwards compatibility)""" - - api_token: str - _api_url: str = "https://api-inference.huggingface.co/models/bigcode/starcoder" - _max_retries: int = 30 - - def __init__(self, **kwargs): - warnings.warn( - """Starcoder has been deprecated as of version 1.5. - Please choose a different LLM instead from the ones listed in - https://docs.pandas-ai.com/en/latest/API/llms/ - """ - ) - - @property - def type(self) -> str: - return "starcoder" diff --git a/pandasai/pipelines/__init__.py b/pandasai/pipelines/__init__.py index ef7b5ed2f..0e748a585 100644 --- a/pandasai/pipelines/__init__.py +++ b/pandasai/pipelines/__init__.py @@ -1,6 +1,5 @@ from .abstract_pipeline import AbstractPipeline from .base_logic_unit import BaseLogicUnit from .pipeline import Pipeline -from .synthetic_dataframe.generate_sdf_pipeline import GenerateSDFPipeline __all__ = ["Pipeline", "AbstractPipeline", "BaseLogicUnit", "GenerateSDFPipeline"] diff --git a/pandasai/pipelines/base_logic_unit.py b/pandasai/pipelines/base_logic_unit.py index 221cabe15..93705c07a 100644 --- a/pandasai/pipelines/base_logic_unit.py +++ b/pandasai/pipelines/base_logic_unit.py @@ -1,20 +1,22 @@ from abc import ABC, abstractmethod from typing import Any +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + class BaseLogicUnit(ABC): """ Logic units for pipeline each logic unit should be inherited from this Logic unit """ - _skip_if: callable - - def __init__(self, skip_if=None): + def __init__(self, skip_if=None, on_execution=None, before_execution=None): super().__init__() - self._skip_if = skip_if + self.skip_if = skip_if + self.on_execution = on_execution + self.before_execution = before_execution @abstractmethod - def execute(self, input: Any, **kwargs) -> Any: + def execute(self, input: Any, **kwargs) -> LogicUnitOutput: """ This method will return output according to Implementation. @@ -28,7 +30,3 @@ def execute(self, input: Any, **kwargs) -> Any: :return: The result of the execution. """ raise NotImplementedError("execute method is not implemented.") - - @property - def skip_if(self): - return self._skip_if diff --git a/pandasai/pipelines/smart_datalake_chat/cache_lookup.py b/pandasai/pipelines/chat/cache_lookup.py similarity index 82% rename from pandasai/pipelines/smart_datalake_chat/cache_lookup.py rename to pandasai/pipelines/chat/cache_lookup.py index 06624ac74..2cf7cba0f 100644 --- a/pandasai/pipelines/smart_datalake_chat/cache_lookup.py +++ b/pandasai/pipelines/chat/cache_lookup.py @@ -1,5 +1,7 @@ from typing import Any +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + from ...helpers.logger import Logger from ..base_logic_unit import BaseLogicUnit from ..pipeline_context import PipelineContext @@ -35,10 +37,11 @@ def execute(self, input: Any, **kwargs) -> Any: ) ): logger.log("Using cached response") - code = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.cache.get, - pipeline_context.cache.get_cache_key(pipeline_context), - tag="cache_hit", + + code = pipeline_context.cache.get( + pipeline_context.cache.get_cache_key(pipeline_context) ) - pipeline_context.add_intermediate_value("is_present_in_cache", True) - return code + + pipeline_context.add("found_in_cache", True) + + return LogicUnitOutput(code, True, "Cache Hit") diff --git a/pandasai/pipelines/smart_datalake_chat/cache_population.py b/pandasai/pipelines/chat/cache_population.py similarity index 78% rename from pandasai/pipelines/smart_datalake_chat/cache_population.py rename to pandasai/pipelines/chat/cache_population.py index 166448cb6..6393eddf1 100644 --- a/pandasai/pipelines/smart_datalake_chat/cache_population.py +++ b/pandasai/pipelines/chat/cache_population.py @@ -1,5 +1,7 @@ from typing import Any +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + from ..base_logic_unit import BaseLogicUnit from ..pipeline_context import PipelineContext @@ -33,4 +35,10 @@ def execute(self, input: Any, **kwargs) -> Any: pipeline_context.cache.get_cache_key(pipeline_context), code ) - return code + return LogicUnitOutput( + code, + True, + "Prompt Cached Successfully" + if pipeline_context.config.enable_cache + else "Caching disabled", + ) diff --git a/pandasai/pipelines/chat/chat_pipeline_input.py b/pandasai/pipelines/chat/chat_pipeline_input.py new file mode 100644 index 000000000..aa9541b75 --- /dev/null +++ b/pandasai/pipelines/chat/chat_pipeline_input.py @@ -0,0 +1,27 @@ +import uuid +from dataclasses import dataclass + + +@dataclass +class ChatPipelineInput: + """ + Contain all the data needed by the chat pipeline + """ + + query: str + output_type: str + instance: str + conversation_id: uuid.UUID + prompt_id: uuid.UUID + + def __init__( + self, + query: str, + output_type: str, + conversation_id: uuid.UUID, + prompt_id: uuid.UUID, + ) -> None: + self.query = query + self.output_type = output_type + self.conversation_id = conversation_id + self.prompt_id = prompt_id diff --git a/pandasai/pipelines/chat/code_execution.py b/pandasai/pipelines/chat/code_execution.py new file mode 100644 index 000000000..8c8a91559 --- /dev/null +++ b/pandasai/pipelines/chat/code_execution.py @@ -0,0 +1,133 @@ +import logging +import traceback +from typing import Any, Callable + +from pandasai.exceptions import InvalidLLMOutputType +from pandasai.pipelines.logic_unit_output import LogicUnitOutput +from pandasai.responses.response_serializer import ResponseSerializer + +from ...helpers.code_manager import CodeExecutionContext, CodeManager +from ...helpers.logger import Logger +from ...helpers.output_validator import OutputValidator +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext + + +class CodeExecution(BaseLogicUnit): + """ + Code Execution Stage + """ + + def __init__( + self, + on_failure: Callable[[str, Exception], None] = None, + on_retry: Callable[[str, Exception], None] = None, + **kwargs, + ): + super().__init__() + self.on_failure = on_failure + self.on_retry = on_retry + + def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ + self.context: PipelineContext = kwargs.get("context") + self.logger: Logger = kwargs.get("logger") + + # Execute the code + code_context = CodeExecutionContext( + self.context.get("last_prompt_id"), self.context.skills_manager + ) + + code_manager = CodeManager( + dfs=self.context.dfs, + config=self.context.config, + logger=self.logger, + ) + + # code = input.input + retry_count = 0 + code_to_run = input + result = None + while retry_count <= self.context.config.max_retries: + try: + result = code_manager.execute_code(code_to_run, code_context) + + if self.context.get("output_type") != "" and ( + output_helper := self.context.get("output_type") + ): + (validation_ok, validation_errors) = OutputValidator.validate( + output_helper, result + ) + + if not validation_ok: + raise InvalidLLMOutputType(validation_errors) + break + + except Exception as e: + traceback_errors = traceback.format_exc() + self.logger.log(f"Failed with error: {traceback_errors}", logging.ERROR) + if self.on_failure: + self.on_failure(code_to_run, traceback_errors) + + if ( + not self.context.config.use_error_correction_framework + or retry_count >= self.context.config.max_retries + ): + raise e + + retry_count += 1 + + self.logger.log( + f"Failed to execute code retrying with a correction framework " + f"[retry number: {retry_count}]", + level=logging.WARNING, + ) + + # TODO - Move this implement to main execute function + # Temporarily done for test cases this is to be fixed move to the main function + code_to_run = self._retry_run_code( + code_to_run, self.context, self.logger, e + ) + + return LogicUnitOutput( + result, + True, + "Code Executed Successfully", + {"content_type": "response", "value": ResponseSerializer.serialize(result)}, + final_track_output=True, + ) + + def _retry_run_code( + self, + code: str, + context: PipelineContext, + logger: Logger, + e: Exception, + ) -> str: + """ + A method to retry the code execution with error correction framework. + + Args: + code (str): A python code + context (PipelineContext) : Pipeline Context + logger (Logger) : Logger + e (Exception): An exception + dataframes + + Returns (str): A python code + """ + if self.on_retry: + return self.on_retry(code, e) + else: + raise e diff --git a/pandasai/pipelines/smart_datalake_chat/code_generator.py b/pandasai/pipelines/chat/code_generator.py similarity index 66% rename from pandasai/pipelines/smart_datalake_chat/code_generator.py rename to pandasai/pipelines/chat/code_generator.py index de73b565d..5f82c6a6d 100644 --- a/pandasai/pipelines/smart_datalake_chat/code_generator.py +++ b/pandasai/pipelines/chat/code_generator.py @@ -1,5 +1,7 @@ from typing import Any +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + from ...helpers.logger import Logger from ..base_logic_unit import BaseLogicUnit from ..pipeline_context import PipelineContext @@ -28,13 +30,14 @@ def execute(self, input: Any, **kwargs) -> Any: pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") - generate_python_code_instruction = input + code = pipeline_context.config.llm.generate_code(input, pipeline_context) - code = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.config.llm.generate_code, - generate_python_code_instruction, + pipeline_context.add("last_code_generated", code) + logger.log( + f"""Prompt used: + {pipeline_context.config.llm.last_prompt} + """ ) - pipeline_context.add_intermediate_value("last_code_generated", code) logger.log( f"""Code generated: ``` @@ -43,4 +46,9 @@ def execute(self, input: Any, **kwargs) -> Any: """ ) - return code + return LogicUnitOutput( + code, + True, + "Code Generated Successfully", + {"content_type": "code", "value": code}, + ) diff --git a/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline.py b/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline.py new file mode 100644 index 000000000..f88d4a0fd --- /dev/null +++ b/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline.py @@ -0,0 +1,45 @@ +from typing import Optional + +from pandasai.helpers.logger import Logger +from pandasai.helpers.query_exec_tracker import QueryExecTracker +from pandasai.pipelines.chat.code_generator import CodeGenerator +from pandasai.pipelines.chat.error_correction_pipeline.error_correction_pipeline_input import ( + ErrorCorrectionPipelineInput, +) +from pandasai.pipelines.chat.error_correction_pipeline.error_prompt_generation import ( + ErrorPromptGeneration, +) +from pandasai.pipelines.pipeline import Pipeline +from pandasai.pipelines.pipeline_context import PipelineContext + + +class ErrorCorrectionPipeline: + """ + Error Correction Pipeline to regenerate prompt and code + """ + + _context: PipelineContext + _logger: Logger + + def __init__( + self, + context: Optional[PipelineContext] = None, + logger: Optional[Logger] = None, + query_exec_tracker: QueryExecTracker = None, + on_prompt_generation=None, + ): + self.pipeline = Pipeline( + context=context, + logger=logger, + query_exec_tracker=query_exec_tracker, + steps=[ + ErrorPromptGeneration(on_prompt_generation=on_prompt_generation), + CodeGenerator(), + ], + ) + self._context = context + self._logger = logger + + def run(self, input: ErrorCorrectionPipelineInput): + self._logger.log(f"Executing Pipeline: {self.__class__.__name__}") + return self.pipeline.run(input) diff --git a/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline_input.py b/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline_input.py new file mode 100644 index 000000000..6914f2ce3 --- /dev/null +++ b/pandasai/pipelines/chat/error_correction_pipeline/error_correction_pipeline_input.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass + + +@dataclass +class ErrorCorrectionPipelineInput: + code: str + exception: Exception + + def __init__(self, code: str, exception: Exception): + self.code = code + self.exception = exception diff --git a/pandasai/pipelines/chat/error_correction_pipeline/error_prompt_generation.py b/pandasai/pipelines/chat/error_correction_pipeline/error_prompt_generation.py new file mode 100644 index 000000000..d622f30df --- /dev/null +++ b/pandasai/pipelines/chat/error_correction_pipeline/error_prompt_generation.py @@ -0,0 +1,94 @@ +from typing import Any, Callable + +from pandasai.exceptions import ExecuteSQLQueryNotUsed, InvalidLLMOutputType +from pandasai.helpers.logger import Logger +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.chat.error_correction_pipeline.error_correction_pipeline_input import ( + ErrorCorrectionPipelineInput, +) +from pandasai.pipelines.logic_unit_output import LogicUnitOutput +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.base import BasePrompt +from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt +from pandasai.prompts.correct_execute_sql_query_usage_error_prompt import ( + CorrectExecuteSQLQueryUsageErrorPrompt, +) +from pandasai.prompts.correct_output_type_error_prompt import ( + CorrectOutputTypeErrorPrompt, +) + + +class ErrorPromptGeneration(BaseLogicUnit): + on_prompt_generation: Callable[[str], None] + + def __init__( + self, + on_prompt_generation=None, + skip_if=None, + on_execution=None, + before_execution=None, + ): + self.on_prompt_generation = on_prompt_generation + super().__init__(skip_if, on_execution, before_execution) + + def execute(self, input: ErrorCorrectionPipelineInput, **kwargs) -> Any: + """ + A method to retry the code execution with error correction framework. + + Args: + code (str): A python code + context (PipelineContext) : Pipeline Context + logger (Logger) : Logger + e (Exception): An exception + dataframes + + Returns (str): A python code + """ + self.context: PipelineContext = kwargs.get("context") + self.logger: Logger = kwargs.get("logger") + e = input.exception + + prompt = self.get_prompt(e, input.code) + if self.on_prompt_generation: + self.on_prompt_generation(prompt) + + self.logger.log(f"Using prompt: {prompt}") + + return LogicUnitOutput( + prompt, + True, + "Prompt Generated Successfully", + { + "content_type": "prompt", + "value": prompt.to_string(), + }, + ) + + def get_prompt(self, e: Exception, code: str) -> BasePrompt: + """ + Return a prompt by key. + + Args: + values (dict): The values to use for the prompt + + Returns: + BasePrompt: The prompt + """ + return ( + CorrectOutputTypeErrorPrompt( + context=self.context, + code=code, + error=e, + output_type=self.context.get("output_type"), + ) + if isinstance(e, InvalidLLMOutputType) + else CorrectExecuteSQLQueryUsageErrorPrompt( + context=self.context, code=code, error=e + ) + if isinstance(e, ExecuteSQLQueryNotUsed) + else CorrectErrorPrompt( + context=self.context, + code=code, + error=e, + ) + ) diff --git a/pandasai/pipelines/chat/generate_chat_pipeline.py b/pandasai/pipelines/chat/generate_chat_pipeline.py new file mode 100644 index 000000000..330204a90 --- /dev/null +++ b/pandasai/pipelines/chat/generate_chat_pipeline.py @@ -0,0 +1,177 @@ +from typing import Optional + +from pandasai.helpers.query_exec_tracker import QueryExecTracker +from pandasai.pipelines.chat.chat_pipeline_input import ( + ChatPipelineInput, +) +from pandasai.pipelines.chat.error_correction_pipeline.error_correction_pipeline import ( + ErrorCorrectionPipeline, +) +from pandasai.pipelines.chat.error_correction_pipeline.error_correction_pipeline_input import ( + ErrorCorrectionPipelineInput, +) +from pandasai.pipelines.chat.validate_pipeline_input import ( + ValidatePipelineInput, +) + +from ...helpers.logger import Logger +from ..pipeline import Pipeline +from ..pipeline_context import PipelineContext +from .cache_lookup import CacheLookup +from .cache_population import CachePopulation +from .code_execution import CodeExecution +from .code_generator import CodeGenerator +from .prompt_generation import PromptGeneration +from .result_parsing import ResultParsing +from .result_validation import ResultValidation + + +class GenerateChatPipeline: + pipeline: Pipeline + context: PipelineContext + _logger: Logger + last_error: str + + def __init__( + self, + context: Optional[PipelineContext] = None, + logger: Optional[Logger] = None, + on_prompt_generation=None, + on_code_generation=None, + on_code_execution=None, + on_result=None, + ): + self.query_exec_tracker = QueryExecTracker( + server_config=context.config.log_server + ) + + self.pipeline = Pipeline( + context=context, + logger=logger, + query_exec_tracker=self.query_exec_tracker, + steps=[ + ValidatePipelineInput(), + CacheLookup(), + PromptGeneration( + skip_if=self.is_cached, + on_execution=on_prompt_generation, + ), + CodeGenerator( + skip_if=self.is_cached, + on_execution=on_code_generation, + ), + CachePopulation(skip_if=self.is_cached), + CodeExecution( + before_execution=on_code_execution, + on_failure=self.on_code_execution_failure, + on_retry=self.on_code_retry, + ), + ResultValidation(), + ResultParsing( + before_execution=on_result, + ), + ], + ) + + self.code_exec_error_pipeline = ErrorCorrectionPipeline( + context=context, + logger=logger, + query_exec_tracker=self.query_exec_tracker, + on_prompt_generation=on_prompt_generation, + ) + + self.context = context + self._logger = logger + self.last_error = None + + def on_code_execution_failure(self, code: str, errors: Exception) -> str: + """ + Executes on code execution failure + Args: + code (str): code that is ran + exception (Exception): exception that is raised during code execution + + Returns: + str: returns the updated code with the fixes + """ + # Add information about the code failure in the query tracker for debug + self.query_exec_tracker.add_step( + { + "type": "CodeExecution", + "success": False, + "message": "Failed to execute code", + "execution_time": None, + "data": { + "content_type": "code", + "value": code, + "exception": errors, + }, + } + ) + + def on_code_retry(self, code: str, exception: Exception): + correction_input = ErrorCorrectionPipelineInput(code, exception) + return self.code_exec_error_pipeline.run(correction_input) + + def is_cached(self, context: PipelineContext): + return context.get("found_in_cache") + + def get_last_track_log_id(self): + return self.query_exec_tracker.last_log_id + + def run(self, input: ChatPipelineInput) -> dict: + """ + Executes the chat pipeline with user input and return the result + Args: + input (ChatPipelineInput): _description_ + + Returns: + The `output` dictionary is expected to have the following keys: + - 'type': The type of the output. + - 'value': The value of the output. + """ + self._logger.log(f"Executing Pipeline: {self.__class__.__name__}") + + # Reset intermediate values + self.context.reset_intermediate_values() + + # Start New Tracking for Query + self.query_exec_tracker.start_new_track(input) + + self.query_exec_tracker.add_skills(self.context) + + self.query_exec_tracker.add_dataframes(self.context.dfs) + + # Add Query to memory + self.context.memory.add(input.query, True) + + self.context.add_many( + { + "output_type": input.output_type, + "last_prompt_id": input.prompt_id, + } + ) + try: + output = self.pipeline.run(input) + + self.query_exec_tracker.success = True + + self.query_exec_tracker.publish() + + return output + + except Exception as e: + # Show the full traceback + import traceback + + traceback.print_exc() + + self.last_error = str(e) + self.query_exec_tracker.success = False + self.query_exec_tracker.publish() + + return ( + "Unfortunately, I was not able to answer your question, " + "because of the following error:\n" + f"\n{e}\n" + ) diff --git a/pandasai/pipelines/chat/prompt_generation.py b/pandasai/pipelines/chat/prompt_generation.py new file mode 100644 index 000000000..6b48b05eb --- /dev/null +++ b/pandasai/pipelines/chat/prompt_generation.py @@ -0,0 +1,68 @@ +from typing import Any, Union + +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + +from ...helpers.logger import Logger +from ...prompts.base import BasePrompt +from ...prompts.generate_python_code import GeneratePythonCodePrompt +from ...prompts.generate_python_code_with_sql import GeneratePythonCodeWithSQLPrompt +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext + + +class PromptGeneration(BaseLogicUnit): + """ + Code Prompt Generation Stage + """ + + pass + + def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ + self.context: PipelineContext = kwargs.get("context") + self.logger: Logger = kwargs.get("logger") + + prompt = self.get_chat_prompt(self.context) + self.logger.log(f"Using prompt: {prompt}") + + return LogicUnitOutput( + prompt, + True, + "Prompt Generated Successfully", + {"content_type": "prompt", "value": prompt.to_string()}, + ) + + def get_chat_prompt(self, context: PipelineContext) -> Union[str, BasePrompt]: + # set matplotlib as the default library + viz_lib = "matplotlib" + if context.config.data_viz_library: + viz_lib = context.config.data_viz_library + + output_type = context.get("output_type") + + return ( + GeneratePythonCodeWithSQLPrompt( + context=context, + last_code_generated=context.get("last_code_generated"), + viz_lib=viz_lib, + output_type=output_type, + ) + if context.config.direct_sql + else GeneratePythonCodePrompt( + context=context, + last_code_generated=context.get("last_code_generated"), + viz_lib=viz_lib, + output_type=output_type, + ) + ) diff --git a/pandasai/pipelines/smart_datalake_chat/result_parsing.py b/pandasai/pipelines/chat/result_parsing.py similarity index 69% rename from pandasai/pipelines/smart_datalake_chat/result_parsing.py rename to pandasai/pipelines/chat/result_parsing.py index 406174a46..0721d3ca1 100644 --- a/pandasai/pipelines/smart_datalake_chat/result_parsing.py +++ b/pandasai/pipelines/chat/result_parsing.py @@ -1,5 +1,9 @@ from typing import Any +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + +from ...responses.context import Context +from ...responses.response_parser import ResponseParser from ..base_logic_unit import BaseLogicUnit from ..pipeline_context import PipelineContext @@ -12,6 +16,14 @@ class ResultParsing(BaseLogicUnit): pass + def response_parser(self, context: PipelineContext, logger) -> ResponseParser: + context = Context(context.config, logger=logger) + return ( + context.config.response_parser(context) + if context.config.response_parser + else ResponseParser(context) + ) + def execute(self, input: Any, **kwargs) -> Any: """ This method will return output according to @@ -31,10 +43,9 @@ def execute(self, input: Any, **kwargs) -> Any: self._add_result_to_memory(result=result, context=pipeline_context) - result = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value("response_parser").parse, result - ) - return result + parser = self.response_parser(pipeline_context, logger=kwargs.get("logger")) + result = parser.parse(result) + return LogicUnitOutput(result, True, "Results parsed successfully") def _add_result_to_memory(self, result: dict, context: PipelineContext): """ diff --git a/pandasai/pipelines/smart_datalake_chat/result_validation.py b/pandasai/pipelines/chat/result_validation.py similarity index 55% rename from pandasai/pipelines/smart_datalake_chat/result_validation.py rename to pandasai/pipelines/chat/result_validation.py index 7a92c083f..1154ca5f3 100644 --- a/pandasai/pipelines/smart_datalake_chat/result_validation.py +++ b/pandasai/pipelines/chat/result_validation.py @@ -2,7 +2,9 @@ from typing import Any from pandasai.helpers.logger import Logger +from pandasai.pipelines.logic_unit_output import LogicUnitOutput +from ...helpers.output_validator import OutputValidator from ..base_logic_unit import BaseLogicUnit from ..pipeline_context import PipelineContext @@ -31,37 +33,26 @@ def execute(self, input: Any, **kwargs) -> Any: logger: Logger = kwargs.get("logger") result = input + success = False + message = None if result is not None: if isinstance(result, dict): ( validation_ok, validation_logs, - ) = pipeline_context.get_intermediate_value( - "output_type_helper" - ).validate(result) + ) = OutputValidator.validate( + pipeline_context.get("output_type"), result + ) if not validation_ok: logger.log("\n".join(validation_logs), level=logging.WARNING) - pipeline_context.query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": False, - "message": "Output Validation Failed", - } - ) + success = False + message = "Output Validation Failed" + else: - pipeline_context.query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": True, - "message": "Output Validation Successful", - } - ) + success = True + message = "Output Validation Successful" - pipeline_context.add_intermediate_value("last_result", result) + pipeline_context.add("last_result", result) logger.log(f"Answer: {result}") - logger.log( - f"Executed in: {pipeline_context.query_exec_tracker.get_execution_time()}s" - ) - - return result + return LogicUnitOutput(result, success, message) diff --git a/pandasai/pipelines/chat/validate_pipeline_input.py b/pandasai/pipelines/chat/validate_pipeline_input.py new file mode 100644 index 000000000..aa07073b9 --- /dev/null +++ b/pandasai/pipelines/chat/validate_pipeline_input.py @@ -0,0 +1,53 @@ +from typing import Any, List + +from pandasai.connectors.sql import SQLConnector +from pandasai.exceptions import InvalidConfigError +from pandasai.pipelines.logic_unit_output import LogicUnitOutput + +from ...connectors import BaseConnector +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext + + +class ValidatePipelineInput(BaseLogicUnit): + """ + Validates pipeline input + """ + + pass + + def _validate_direct_sql(self, dfs: List[BaseConnector]) -> bool: + """ + Raises error if they don't belong sqlconnector or have different credentials + Args: + dfs (List[BaseConnector]): list of BaseConnectors + + Raises: + InvalidConfigError: Raise Error in case of config is set but criteria is not met + """ + + if self.context.config.direct_sql: + if all((isinstance(df, SQLConnector) and df.equals(dfs[0])) for df in dfs): + return True + else: + raise InvalidConfigError( + "Direct requires all SQLConnector and they belong to same datasource " + "and have same credentials" + ) + return False + + def execute(self, input: Any, **kwargs) -> Any: + """ + This method validates pipeline context and configs + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ + self.context: PipelineContext = kwargs.get("context") + self._validate_direct_sql(self.context.dfs) + return LogicUnitOutput(input, True, "Input Validation Successful") diff --git a/pandasai/pipelines/logic_unit_output.py b/pandasai/pipelines/logic_unit_output.py new file mode 100644 index 000000000..bbbca7609 --- /dev/null +++ b/pandasai/pipelines/logic_unit_output.py @@ -0,0 +1,29 @@ +from dataclasses import dataclass +from typing import Any + + +@dataclass +class LogicUnitOutput: + """ + Pipeline step output + """ + + output: Any + message: str + success: bool + metadata: dict + final_track_output: bool + + def __init__( + self, + output: Any = None, + success: bool = False, + message: str = None, + metadata: dict = None, + final_track_output: bool = False, + ): + self.output = output + self.message = message + self.metadata = metadata + self.success = success + self.final_track_output = final_track_output diff --git a/pandasai/pipelines/pipeline.py b/pandasai/pipelines/pipeline.py index 7cb39f7c5..a534d990f 100644 --- a/pandasai/pipelines/pipeline.py +++ b/pandasai/pipelines/pipeline.py @@ -1,13 +1,16 @@ import logging +import time from typing import Any, List, Optional, Union -from pandasai.config import load_config +from pandasai.config import load_config_from_json from pandasai.exceptions import UnSupportedLogicUnit -from pandasai.helpers.df_info import DataFrameType from pandasai.helpers.logger import Logger +from pandasai.helpers.query_exec_tracker import QueryExecTracker from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.logic_unit_output import LogicUnitOutput from pandasai.pipelines.pipeline_context import PipelineContext +from ..connectors import BaseConnector from ..schemas.df_config import Config from .abstract_pipeline import AbstractPipeline @@ -20,11 +23,13 @@ class Pipeline(AbstractPipeline): _context: PipelineContext _logger: Logger _steps: List[BaseLogicUnit] + _query_exec_tracker: Optional[QueryExecTracker] def __init__( self, - context: Union[List[Union[DataFrameType, Any]], PipelineContext] = None, + context: Union[List[BaseConnector], PipelineContext], config: Optional[Union[Config, dict]] = None, + query_exec_tracker: Optional[QueryExecTracker] = None, steps: Optional[List] = None, logger: Optional[Logger] = None, ): @@ -39,11 +44,9 @@ def __init__( """ if not isinstance(context, PipelineContext): - from pandasai.smart_dataframe import load_smartdataframes - - config = Config(**load_config(config)) - smart_dfs = load_smartdataframes(context, config) - context = PipelineContext(smart_dfs, config) + config = Config(**load_config_from_json(config)) + connectors = context + context = PipelineContext(connectors, config) self._logger = ( Logger(save_logs=context.config.save_logs, verbose=context.config.verbose) @@ -53,6 +56,9 @@ def __init__( self._context = context self._steps = steps or [] + self._query_exec_tracker = query_exec_tracker or QueryExecTracker( + server_config=self._context.config.log_server + ) def add_step(self, logic: BaseLogicUnit): """ @@ -79,18 +85,53 @@ def run(self, data: Any = None) -> Any: """ try: for index, logic in enumerate(self._steps): + # Callback function before execution + if logic.before_execution is not None: + logic.before_execution(data) + self._logger.log(f"Executing Step {index}: {logic.__class__.__name__}") if logic.skip_if is not None and logic.skip_if(self._context): + self._logger.log(f"Executing Step {index}: Skipping...") continue - data = logic.execute( + start_time = time.time() + + # Execute the logic unit + step_output = logic.execute( data, logger=self._logger, config=self._context.config, context=self._context, ) + execution_time = time.time() - start_time + + # Track the execution step of pipeline + if isinstance(step_output, LogicUnitOutput): + self._query_exec_tracker.add_step( + { + "type": logic.__class__.__name__, + "success": step_output.success, + "message": step_output.message, + "execution_time": execution_time, + "data": step_output.metadata, + } + ) + + if step_output.final_track_output: + self._query_exec_tracker.set_final_response( + step_output.metadata + ) + + data = step_output.output + else: + data = step_output + + # Callback function after execution + if logic.on_execution is not None: + logic.on_execution(data) + except Exception as e: self._logger.log(f"Pipeline failed on step {index}: {e}", logging.ERROR) raise e diff --git a/pandasai/pipelines/pipeline_context.py b/pandasai/pipelines/pipeline_context.py index 83bcd8782..175a8dfd9 100644 --- a/pandasai/pipelines/pipeline_context.py +++ b/pandasai/pipelines/pipeline_context.py @@ -1,11 +1,12 @@ from typing import Any, List, Optional, Union from pandasai.helpers.cache import Cache -from pandasai.helpers.df_info import DataFrameType from pandasai.helpers.memory import Memory -from pandasai.helpers.query_exec_tracker import QueryExecTracker from pandasai.helpers.skills_manager import SkillsManager from pandasai.schemas.df_config import Config +from pandasai.vectorstores.vectorstore import VectorStore + +from ..connectors import BaseConnector class PipelineContext: @@ -13,65 +14,44 @@ class PipelineContext: Pass Context to the pipeline which is accessible to each step via kwargs """ - _dfs: List[Union[DataFrameType, Any]] - _memory: Memory - _skills: SkillsManager - _cache: Optional[Cache] - _config: Config - _query_exec_tracker: QueryExecTracker - _intermediate_values: dict - def __init__( self, - dfs: List[Union[DataFrameType, Any]], + dfs: List[BaseConnector], config: Optional[Union[Config, dict]] = None, memory: Optional[Memory] = None, - skills: Optional[SkillsManager] = None, + skills_manager: Optional[SkillsManager] = None, cache: Optional[Cache] = None, - query_exec_tracker: Optional[QueryExecTracker] = None, + vectorstore: VectorStore = None, + initial_values: dict = None, ) -> None: - from pandasai.smart_dataframe import load_smartdataframes - if isinstance(config, dict): config = Config(**config) - self._dfs = load_smartdataframes(dfs, config) - self._memory = memory if memory is not None else Memory() - self._skills = skills if skills is not None else SkillsManager() + self.dfs = dfs + self.memory = memory or Memory() + self.skills_manager = skills_manager or SkillsManager() + if config.enable_cache: - self._cache = cache if cache is not None else Cache() + self.cache = cache if cache is not None else Cache() else: - self._cache = None - self._config = config - self._query_exec_tracker = query_exec_tracker - self._intermediate_values = {} + self.cache = None - @property - def dfs(self) -> List[Union[DataFrameType, Any]]: - return self._dfs + self.config = config - @property - def memory(self): - return self._memory + self.intermediate_values = initial_values or {} - @property - def skills(self): - return self._skills + self.vectorstore = vectorstore - @property - def cache(self): - return self._cache + self._initial_values = initial_values - @property - def config(self): - return self._config + def reset_intermediate_values(self): + self.intermediate_values = self._initial_values or {} - @property - def query_exec_tracker(self): - return self._query_exec_tracker + def add(self, key: str, value: Any): + self.intermediate_values[key] = value - def add_intermediate_value(self, key: str, value: Any): - self._intermediate_values[key] = value + def add_many(self, values: dict): + self.intermediate_values.update(values) - def get_intermediate_value(self, key: str): - return self._intermediate_values.get(key, "") + def get(self, key: str): + return self.intermediate_values.get(key, "") diff --git a/pandasai/pipelines/smart_datalake_chat/code_execution.py b/pandasai/pipelines/smart_datalake_chat/code_execution.py deleted file mode 100644 index bedd0ca98..000000000 --- a/pandasai/pipelines/smart_datalake_chat/code_execution.py +++ /dev/null @@ -1,143 +0,0 @@ -import logging -import traceback -from typing import Any, List - -from pandasai.exceptions import InvalidLLMOutputType -from pandasai.prompts.base import AbstractPrompt -from pandasai.prompts.correct_output_type_error_prompt import ( - CorrectOutputTypeErrorPrompt, -) - -from ...helpers.code_manager import CodeExecutionContext -from ...helpers.logger import Logger -from ...prompts.correct_error_prompt import CorrectErrorPrompt -from ..base_logic_unit import BaseLogicUnit -from ..pipeline_context import PipelineContext - - -class CodeExecution(BaseLogicUnit): - """ - Code Execution Stage - """ - - pass - - def execute(self, input: Any, **kwargs) -> Any: - """ - This method will return output according to - Implementation. - - :param input: Your input data. - :param kwargs: A dictionary of keyword arguments. - - 'logger' (any): The logger for logging. - - 'config' (Config): Global configurations for the test - - 'context' (any): The execution context. - - :return: The result of the execution. - """ - pipeline_context: PipelineContext = kwargs.get("context") - logger: Logger = kwargs.get("logger") - - code = input - retry_count = 0 - code_to_run = code - result = None - while retry_count < pipeline_context.config.max_retries: - try: - # Execute the code - code_context = CodeExecutionContext( - pipeline_context.get_intermediate_value("last_prompt_id"), - pipeline_context.get_intermediate_value("skills"), - ) - - result = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value( - "code_manager" - ).execute_code, - code=code_to_run, - context=code_context, - ) - - if output_helper := pipeline_context.get_intermediate_value( - "output_type_helper" - ): - (validation_ok, validation_errors) = output_helper.validate(result) - - if not validation_ok: - raise InvalidLLMOutputType(validation_errors) - - break - - except Exception as e: - if ( - not pipeline_context.config.use_error_correction_framework - or retry_count >= pipeline_context.config.max_retries - 1 - ): - raise e - - retry_count += 1 - - logger.log( - f"Failed to execute code with a correction framework " - f"[retry number: {retry_count}]", - level=logging.WARNING, - ) - - traceback_error = traceback.format_exc() - - # Get Error Prompt for retry - error_prompt = self._get_error_prompt(e) - code_to_run = pipeline_context.query_exec_tracker.execute_func( - self._retry_run_code, - code, - pipeline_context, - logger, - traceback_error, - error_prompt, - ) - - return result - - def _get_error_prompt(self, e: Exception) -> AbstractPrompt: - if isinstance(e, InvalidLLMOutputType): - return CorrectOutputTypeErrorPrompt() - else: - return CorrectErrorPrompt() - - def _retry_run_code( - self, - code: str, - context: PipelineContext, - logger: Logger, - e: Exception, - error_prompt=CorrectErrorPrompt(), - ) -> List: - """ - A method to retry the code execution with error correction framework. - - Args: - code (str): A python code - context (PipelineContext) : Pipeline Context - logger (Logger) : Logger - e (Exception): An exception - dataframes - - Returns (str): A python code - """ - logger.log(f"Failed with error: {e}. Retrying", logging.ERROR) - - default_values = { - "engine": context.dfs[0].engine, - "code": code, - "error_returned": e, - "output_type_hint": context.get_intermediate_value( - "output_type_helper" - ).template_hint, - } - error_correcting_instruction = context.get_intermediate_value("get_prompt")( - "correct_error", - default_prompt=error_prompt, - default_values=default_values, - ) - - return context.config.llm.generate_code(error_correcting_instruction) diff --git a/pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py b/pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py deleted file mode 100644 index 97090eb49..000000000 --- a/pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py +++ /dev/null @@ -1,50 +0,0 @@ -from typing import Optional - -from ...helpers.logger import Logger -from ..pipeline import Pipeline -from ..pipeline_context import PipelineContext -from .cache_lookup import CacheLookup -from .cache_population import CachePopulation -from .code_execution import CodeExecution -from .code_generator import CodeGenerator -from .prompt_generation import PromptGeneration -from .result_parsing import ResultParsing -from .result_validation import ResultValidation - - -class GenerateSmartDatalakePipeline: - _pipeline: Pipeline - - def __init__( - self, - context: Optional[PipelineContext] = None, - logger: Optional[Logger] = None, - ): - self._pipeline = Pipeline( - context=context, - logger=logger, - steps=[ - CacheLookup(), - PromptGeneration( - skip_if=lambda pipeline_context: pipeline_context.get_intermediate_value( - "is_present_in_cache" - ) - ), - CodeGenerator( - skip_if=lambda pipeline_context: pipeline_context.get_intermediate_value( - "is_present_in_cache" - ) - ), - CachePopulation( - skip_if=lambda pipeline_context: pipeline_context.get_intermediate_value( - "is_present_in_cache" - ) - ), - CodeExecution(), - ResultValidation(), - ResultParsing(), - ], - ) - - def run(self): - return self._pipeline.run() diff --git a/pandasai/pipelines/smart_datalake_chat/prompt_generation.py b/pandasai/pipelines/smart_datalake_chat/prompt_generation.py deleted file mode 100644 index 98a008959..000000000 --- a/pandasai/pipelines/smart_datalake_chat/prompt_generation.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Any - -from ...prompts.direct_sql_prompt import DirectSQLPrompt -from ...prompts.file_based_prompt import FileBasedPrompt -from ...prompts.generate_python_code import GeneratePythonCodePrompt -from ..base_logic_unit import BaseLogicUnit -from ..pipeline_context import PipelineContext - - -class PromptGeneration(BaseLogicUnit): - """ - Code Prompt Generation Stage - """ - - pass - - def _get_chat_prompt(self, context: PipelineContext) -> [str, FileBasedPrompt]: - key = ( - "direct_sql_prompt" if context.config.direct_sql else "generate_python_code" - ) - return ( - key, - ( - DirectSQLPrompt(tables=context.dfs) - if context.config.direct_sql - else GeneratePythonCodePrompt() - ), - ) - - def execute(self, input: Any, **kwargs) -> Any: - """ - This method will return output according to - Implementation. - - :param input: Your input data. - :param kwargs: A dictionary of keyword arguments. - - 'logger' (any): The logger for logging. - - 'config' (Config): Global configurations for the test - - 'context' (any): The execution context. - - :return: The result of the execution. - """ - pipeline_context: PipelineContext = kwargs.get("context") - - default_values = { - # TODO: find a better way to determine the engine, - "engine": pipeline_context.dfs[0].engine, - "output_type_hint": pipeline_context.get_intermediate_value( - "output_type_helper" - ).template_hint, - "viz_library_type": pipeline_context.get_intermediate_value( - "viz_lib_helper" - ).template_hint, - } - - if ( - pipeline_context.memory.size > 1 - and pipeline_context.memory.count() > 1 - and pipeline_context.get_intermediate_value("last_code_generated") - ): - default_values["current_code"] = pipeline_context.get_intermediate_value( - "last_code_generated" - ) - default_values["code_description"] = "" - - [key, default_prompt] = self._get_chat_prompt(pipeline_context) - - return pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value("get_prompt"), - key=key, - default_prompt=default_prompt, - default_values=default_values, - ) diff --git a/pandasai/pipelines/synthetic_dataframe/generate_sdf_pipeline.py b/pandasai/pipelines/synthetic_dataframe/generate_sdf_pipeline.py deleted file mode 100644 index 1f5996151..000000000 --- a/pandasai/pipelines/synthetic_dataframe/generate_sdf_pipeline.py +++ /dev/null @@ -1,37 +0,0 @@ -from logging import Logger -from typing import Optional - -from pandasai.pipelines.logic_units.output_logic_unit import ProcessOutput -from pandasai.pipelines.logic_units.prompt_execution import PromptExecution -from pandasai.pipelines.pipeline import Pipeline -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.synthetic_dataframe.sdf_code_executor import ( - SDFCodeExecutor, -) -from pandasai.pipelines.synthetic_dataframe.synthetic_df_prompt import ( - SyntheticDataframePrompt, -) - - -class GenerateSDFPipeline: - _pipeline: Pipeline - - def __init__( - self, - amount: int = 100, - context: Optional[PipelineContext] = None, - logger: Optional[Logger] = None, - ): - self._pipeline = Pipeline( - context=context, - logger=logger, - steps=[ - SyntheticDataframePrompt(amount=amount), - PromptExecution(), - SDFCodeExecutor(), - ProcessOutput(), - ], - ) - - def run(self): - return self._pipeline.run() diff --git a/pandasai/pipelines/synthetic_dataframe/sdf_code_executor.py b/pandasai/pipelines/synthetic_dataframe/sdf_code_executor.py deleted file mode 100644 index fdf5e8c53..000000000 --- a/pandasai/pipelines/synthetic_dataframe/sdf_code_executor.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Any - -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.logic_units.code_executor import BaseCodeExecutor - - -class SDFCodeExecutor(BaseLogicUnit): - """ - Executes the code generated by the prompt - """ - - def execute(self, input: Any, **kwargs) -> Any: - code_exec = BaseCodeExecutor() - logger = kwargs.get("logger") - try: - logger.log(f"Executing code: {input}") - namespace = code_exec.execute(input, **kwargs) - - if "df" not in namespace: - raise ValueError(f"Unable to execute code: {input}") - - return {"type": "dataframe", "value": namespace["df"]} - - except Exception: - if logger is not None: - logger.log("Error in executing code") - raise diff --git a/pandasai/pipelines/synthetic_dataframe/synthetic_df_prompt.py b/pandasai/pipelines/synthetic_dataframe/synthetic_df_prompt.py deleted file mode 100644 index c6a0f8888..000000000 --- a/pandasai/pipelines/synthetic_dataframe/synthetic_df_prompt.py +++ /dev/null @@ -1,36 +0,0 @@ -from typing import Any - -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.prompts.generate_synthetic_df_prompt import GenerateSyntheticDfPrompt - - -class SyntheticDataframePrompt(BaseLogicUnit): - """ - Generates the prompt for generating synthetic dataframe - """ - - _amount: int = 100 - - def __init__(self, amount: int = 100): - """ - Initialize the logic unit with the given parameters - Args: - amount (int): Amount of rows to generate - """ - self._amount = amount - - def execute(self, input: Any, **kwargs) -> Any: - context: PipelineContext = kwargs.get("context") - logger = kwargs.get("logger") - - if context is None or len(context.dfs) == 0: - raise ValueError("Dataframe not found") - - prompt = GenerateSyntheticDfPrompt( - amount=self._amount, - dataframe=context.dfs[kwargs.get("dataframe_index", 0)], - ) - logger.log(f"Generate Prompt: {prompt}") - - return prompt diff --git a/pandasai/prompts/__init__.py b/pandasai/prompts/__init__.py index 4b520f5f7..bd61e860a 100644 --- a/pandasai/prompts/__init__.py +++ b/pandasai/prompts/__init__.py @@ -1,11 +1,9 @@ -from .base import AbstractPrompt +from .base import BasePrompt from .correct_error_prompt import CorrectErrorPrompt -from .file_based_prompt import FileBasedPrompt from .generate_python_code import GeneratePythonCodePrompt __all__ = [ - "AbstractPrompt", + "BasePrompt", "CorrectErrorPrompt", "GeneratePythonCodePrompt", - "FileBasedPrompt", ] diff --git a/pandasai/prompts/base.py b/pandasai/prompts/base.py index 9edf90fb3..062397b5e 100644 --- a/pandasai/prompts/base.py +++ b/pandasai/prompts/base.py @@ -1,118 +1,63 @@ """ Base class to implement a new Prompt In order to better handle the instructions, this prompt module is written. """ -import string -from abc import ABC, abstractmethod +import os +import re +from pathlib import Path +from typing import Optional +from jinja2 import Environment, FileSystemLoader -class AbstractPrompt(ABC): + +class BasePrompt: """Base class to implement a new Prompt. Inheritors have to override `template` property. """ - _args: dict = None - _config: dict = None + template: Optional[str] = None + template_path: Optional[str] = None def __init__(self, **kwargs): - """ - __init__ method of Base class of Prompt Module - Args: - **kwargs: Inferred Keyword Arguments - """ - if self._args is None: - self._args = {} - - self._args.update(kwargs) - self.setup(**kwargs) - - def setup(self, **kwargs) -> None: - pass - - def on_prompt_generation(self) -> None: - pass - - def _generate_dataframes(self, dfs): - """ - Generate the dataframes metadata - Args: - dfs: List of Dataframes - """ - dataframes = [] - for index, df in enumerate(dfs, start=1): - dataframe_info = " str: - ... - - def set_config(self, config): - self._config = config - - def get_config(self, key=None): - if self._config is None: - return None - if key is None: - return self._config - if hasattr(self._config, key): - return getattr(self._config, key) - - def set_var(self, var, value): - if self._args is None: - self._args = {} - - if var == "dfs": - self._args["dataframes"] = self._generate_dataframes(value) - self._args[var] = value - - def set_vars(self, vars): - if self._args is None: - self._args = {} - self._args.update(vars) + return render def to_string(self): - self.on_prompt_generation() - - prompt_args = {} - for key, value in self._args.items(): - if isinstance(value, AbstractPrompt): - args = [ - arg[1] for arg in string.Formatter().parse(value.template) if arg[1] - ] - value.set_vars( - {k: v for k, v in self._args.items() if k != key and k in args} - ) - prompt_args[key] = value.to_string() - else: - prompt_args[key] = value - - return self.template.format_map(prompt_args) + """Render the prompt.""" + if self._resolved_prompt is None: + self._resolved_prompt = self.prompt.render(**self.props) + + return self._resolved_prompt def __str__(self): return self.to_string() def validate(self, output: str) -> bool: return isinstance(output, str) + + def to_json(self): + """ + Return Json Prompt + """ + raise NotImplementedError("Implementation required") diff --git a/pandasai/prompts/check_if_relevant_to_conversation.py b/pandasai/prompts/check_if_relevant_to_conversation.py index 1280e9960..39bf4056d 100644 --- a/pandasai/prompts/check_if_relevant_to_conversation.py +++ b/pandasai/prompts/check_if_relevant_to_conversation.py @@ -1,19 +1,7 @@ -""" Prompt to check if the query is related to the previous conversation +from .base import BasePrompt - -{conversation} - - -{query} - +class CheckIfRelevantToConversationPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" -Is the query related to the conversation? Answer only "true" or "false" (lowercase). -""" -from .file_based_prompt import FileBasedPrompt - - -class CheckIfRelevantToConversationPrompt(FileBasedPrompt): - """Prompt to check if the query is related to the previous conversation""" - - _path_to_template = "assets/prompt_templates/check_if_relevant_to_conversation.tmpl" + template_path = "check_if_relevant_to_conversation.tmpl" diff --git a/pandasai/prompts/clarification_questions_prompt.py b/pandasai/prompts/clarification_questions_prompt.py index 7685e9b2e..221e2a289 100644 --- a/pandasai/prompts/clarification_questions_prompt.py +++ b/pandasai/prompts/clarification_questions_prompt.py @@ -1,39 +1,13 @@ -""" Prompt to get clarification questions -You are provided with the following pandas DataFrames: - -{dataframes} - - -{conversation} - - -Based on the conversation, are there any clarification questions that a senior data scientist would ask? These are questions for non technical people, only ask for questions they could ask given low tech expertise and no knowledge about how the dataframes are structured. - -Return the JSON array of the clarification questions. If there is no clarification question, return an empty array. - -Json: -""" # noqa: E501 - - import json from typing import List -import pandasai.pandas as pd - -from .file_based_prompt import FileBasedPrompt - +from .base import BasePrompt -class ClarificationQuestionPrompt(FileBasedPrompt): - """Prompt to get clarification questions""" - _path_to_template = "assets/prompt_templates/clarification_questions_prompt.tmpl" +class ClarificationQuestionPrompt(BasePrompt): + """Prompt to generate Python code with SQL from a dataframe.""" - def setup( - self, dataframes: List[pd.DataFrame], conversation: str, query: str - ) -> None: - self.set_var("dfs", dataframes) - self.set_var("conversation", conversation) - self.set_var("query", query) + template_path = "clarification_questions_prompt.tmpl" def validate(self, output) -> bool: try: diff --git a/pandasai/prompts/correct_error_prompt.py b/pandasai/prompts/correct_error_prompt.py index f340a7df4..6175dc486 100644 --- a/pandasai/prompts/correct_error_prompt.py +++ b/pandasai/prompts/correct_error_prompt.py @@ -1,22 +1,31 @@ -""" Prompt to correct Python Code on Error -``` -{dataframes} +from .base import BasePrompt -{conversation} -You generated this python code: -{code} +class CorrectErrorPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" -It fails with the following error: -{error_returned} + template_path = "correct_error_prompt.tmpl" -Fix the python code above and return the new python code: -""" # noqa: E501 + def to_json(self): + context = self.props["context"] + code = self.props["code"] + error = self.props["error"] + memory = context.memory + conversations = memory.to_json() -from .file_based_prompt import FileBasedPrompt + system_prompt = memory.get_system_prompt() + # prepare datasets + datasets = [dataset.to_json() for dataset in context.dfs] -class CorrectErrorPrompt(FileBasedPrompt): - """Prompt to Correct Python code on Error""" - - _path_to_template = "assets/prompt_templates/correct_error_prompt.tmpl" + return { + "datasets": datasets, + "conversation": conversations, + "system_prompt": system_prompt, + "error": { + "code": code, + "error_trace": str(error), + "exception_type": "Exception", + }, + "config": {"direct_sql": context.config.direct_sql}, + } diff --git a/pandasai/prompts/correct_execute_sql_query_usage_error_prompt.py b/pandasai/prompts/correct_execute_sql_query_usage_error_prompt.py new file mode 100644 index 000000000..44e496ee9 --- /dev/null +++ b/pandasai/prompts/correct_execute_sql_query_usage_error_prompt.py @@ -0,0 +1,31 @@ +from pandasai.prompts.base import BasePrompt + + +class CorrectExecuteSQLQueryUsageErrorPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" + + template_path = "correct_execute_sql_query_usage_error_prompt.tmpl" + + def to_json(self): + context = self.props["context"] + code = self.props["code"] + error = self.props["error"] + memory = context.memory + conversations = memory.to_json() + + system_prompt = memory.get_system_prompt() + + # prepare datasets + datasets = [dataset.to_json() for dataset in context.dfs] + + return { + "datasets": datasets, + "conversation": conversations, + "system_prompt": system_prompt, + "error": { + "code": code, + "error_trace": str(error), + "exception_type": "ExecuteSQLQueryNotUsed", + }, + "config": {"direct_sql": context.config.direct_sql}, + } diff --git a/pandasai/prompts/correct_output_type_error_prompt.py b/pandasai/prompts/correct_output_type_error_prompt.py index 930265218..52d2d9722 100644 --- a/pandasai/prompts/correct_output_type_error_prompt.py +++ b/pandasai/prompts/correct_output_type_error_prompt.py @@ -1,22 +1,35 @@ -""" Prompt to correct Output Type Python Code on Error -``` -{dataframes} - -{conversation} - -You generated this python code: -{code} - -It fails with the following error: -{error_returned} - -Fix the python code above and return the new python code but the result type should be: -""" # noqa: E501 - -from .file_based_prompt import FileBasedPrompt - - -class CorrectOutputTypeErrorPrompt(FileBasedPrompt): - """Prompt to Correct Python code on Error""" - - _path_to_template = "assets/prompt_templates/correct_output_type_error_prompt.tmpl" +from .base import BasePrompt + + +class CorrectOutputTypeErrorPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" + + template_path = "correct_output_type_error_prompt.tmpl" + + def to_json(self): + context = self.props["context"] + code = self.props["code"] + error = self.props["error"] + output_type = self.props["output_type"] + memory = context.memory + conversations = memory.to_json() + + system_prompt = memory.get_system_prompt() + + # prepare datasets + datasets = [dataset.to_json() for dataset in context.dfs] + + return { + "datasets": datasets, + "conversation": conversations, + "system_prompt": system_prompt, + "error": { + "code": code, + "error_trace": str(error), + "exception_type": "InvalidLLMOutputType", + }, + "config": { + "direct_sql": context.config.direct_sql, + "output_type": output_type, + }, + } diff --git a/pandasai/prompts/direct_sql_prompt.py b/pandasai/prompts/direct_sql_prompt.py index d48c8afdb..204e2c31b 100644 --- a/pandasai/prompts/direct_sql_prompt.py +++ b/pandasai/prompts/direct_sql_prompt.py @@ -1,5 +1,14 @@ """ Prompt to explain code generation by the LLM""" -from .generate_python_code import CurrentCodePrompt, GeneratePythonCodePrompt +from pandasai.helpers.dataframe_serializer import ( + DataframeSerializer, + DataframeSerializerType, +) + +from .generate_python_code import ( + CurrentCodePrompt, + GeneratePythonCodePrompt, + SimpleReasoningPrompt, +) class DirectSQLPrompt(GeneratePythonCodePrompt): @@ -7,22 +16,39 @@ class DirectSQLPrompt(GeneratePythonCodePrompt): _path_to_template = "assets/prompt_templates/direct_sql_connector.tmpl" - def _prepare_tables_data(self, tables): + def _prepare_tables_data(self, tables, config): tables_join = [] - for table in tables: - table_description_tag = ( - f' description="{table.table_description}"' - if table.table_description is not None - else "" + for index, table in enumerate(tables): + table_serialized = DataframeSerializer().serialize( + table, + { + "index": index, + "type": "sql" if config and config.direct_sql else "pandas", + }, + ( + config.dataframe_serializer + if config + else DataframeSerializerType.SQL + ), ) - table_head_tag = f'' - table = f"{table_head_tag}\n{table.head_csv}\n
" - tables_join.append(table) + tables_join.append(table_serialized) return "\n\n".join(tables_join) - def setup(self, tables, **kwargs) -> None: - self.set_var("tables", self._prepare_tables_data(tables)) + def setup(self, tables, config=None, **kwargs) -> None: + self.set_var("tables", self._prepare_tables_data(tables, config)) super(DirectSQLPrompt, self).setup(**kwargs) self.set_var("current_code", kwargs.pop("current_code", CurrentCodePrompt())) + self.set_var( + "code_description", + kwargs.pop("code_description", "Update this initial code:"), + ) + self.set_var("last_message", kwargs.pop("last_message", "")) + self.set_var("prev_conversation", kwargs.pop("prev_conversation", "")) + + def on_prompt_generation(self) -> None: + default_import = "import pandas as pd" + + self.set_var("default_import", default_import) + self.set_var("reasoning", SimpleReasoningPrompt()) diff --git a/pandasai/prompts/explain_prompt.py b/pandasai/prompts/explain_prompt.py index b0c3be027..a9a68acfa 100644 --- a/pandasai/prompts/explain_prompt.py +++ b/pandasai/prompts/explain_prompt.py @@ -1,28 +1,7 @@ -""" Prompt to explain code generation by the LLM -The previous conversation we had +from .base import BasePrompt - -{conversation} - -Based on the last conversation you generated the following code: +class ExplainPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" - -{code} - - -Explain how you came up with code for non-technical people without -mentioning technical details or mentioning the libraries used? - -""" -from .file_based_prompt import FileBasedPrompt - - -class ExplainPrompt(FileBasedPrompt): - """Prompt to explain code generation by the LLM""" - - _path_to_template = "assets/prompt_templates/explain_prompt.tmpl" - - def setup(self, conversation: str, code: str) -> None: - self.set_var("conversation", conversation) - self.set_var("code", code) + template_path = "explain.tmpl" diff --git a/pandasai/prompts/generate_python_code.py b/pandasai/prompts/generate_python_code.py index acd88ce39..ca089f970 100644 --- a/pandasai/prompts/generate_python_code.py +++ b/pandasai/prompts/generate_python_code.py @@ -1,80 +1,30 @@ -""" Prompt to generate Python code -``` -You are provided with the following pandas DataFrames: +from .base import BasePrompt -{dataframes} - -{conversation} - +class GeneratePythonCodePrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" -This is the initial python function. Given the context, use the right dataframes. -{current_code} + template_path = "generate_python_code.tmpl" -Take a deep breath and reason step-by-step. Act as a senior data analyst. -In the answer, you must never write the "technical" names of the tables. -Based on the last message in the conversation: -- return the updated analyze_data function wrapped within ```python ```""" # noqa: E501 + def to_json(self): + context = self.props["context"] + viz_lib = self.props["viz_lib"] + output_type = self.props["output_type"] + memory = context.memory + conversations = memory.to_json() + system_prompt = memory.get_system_prompt() -import pandasai.pandas as pd + # prepare datasets + datasets = [dataset.to_json() for dataset in context.dfs] -from .file_based_prompt import FileBasedPrompt - - -class CurrentCodePrompt(FileBasedPrompt): - """The current code""" - - _path_to_template = "assets/prompt_templates/current_code.tmpl" - - def setup(self, **kwargs) -> None: - if kwargs.get("dfs_declared", False): - self.set_var( - "dfs_declared_message", - "The variable `dfs: list[pd.DataFrame]` is already declared.", - ) - else: - self.set_var("dfs_declared_message", "") - - -class SimpleReasoningPrompt(FileBasedPrompt): - """The simple reasoning instructions""" - - _path_to_template = "assets/prompt_templates/simple_reasoning.tmpl" - - -class VizLibraryPrompt(FileBasedPrompt): - """Provide information about the visualization library""" - - _path_to_template = "assets/prompt_templates/viz_library.tmpl" - - -class GeneratePythonCodePrompt(FileBasedPrompt): - """Prompt to generate Python code""" - - _path_to_template = "assets/prompt_templates/generate_python_code.tmpl" - - def setup(self, **kwargs) -> None: - self.set_var("instructions", kwargs.pop("custom_instructions", "")) - - self.set_var( - "current_code", - kwargs.pop("current_code", CurrentCodePrompt(dfs_declared=True)), - ) - - self.set_var( - "code_description", - kwargs.pop("code_description", "Update this initial code:"), - ) - - self.set_var("last_message", kwargs.pop("last_message", "")) - - self.set_var("prev_conversation", kwargs.pop("prev_conversation", "")) - - def on_prompt_generation(self) -> None: - default_import = f"import {pd.__name__} as pd" - engine_df_name = "pd.DataFrame" - - self.set_var("default_import", default_import) - self.set_var("engine_df_name", engine_df_name) - self.set_var("reasoning", SimpleReasoningPrompt()) + return { + "datasets": datasets, + "conversation": conversations, + "system_prompt": system_prompt, + "config": { + "direct_sql": context.config.direct_sql, + "viz_lib": viz_lib, + "output_type": output_type, + }, + } diff --git a/pandasai/prompts/generate_python_code_with_sql.py b/pandasai/prompts/generate_python_code_with_sql.py new file mode 100644 index 000000000..4250c49a2 --- /dev/null +++ b/pandasai/prompts/generate_python_code_with_sql.py @@ -0,0 +1,7 @@ +from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt + + +class GeneratePythonCodeWithSQLPrompt(GeneratePythonCodePrompt): + """Prompt to generate Python code with SQL from a dataframe.""" + + template_path = "generate_python_code_with_sql.tmpl" diff --git a/pandasai/prompts/generate_synthetic_df_prompt.py b/pandasai/prompts/generate_synthetic_df_prompt.py deleted file mode 100644 index 1afde2ee9..000000000 --- a/pandasai/prompts/generate_synthetic_df_prompt.py +++ /dev/null @@ -1,7 +0,0 @@ -from pandasai.prompts.file_based_prompt import FileBasedPrompt - - -class GenerateSyntheticDfPrompt(FileBasedPrompt): - """The current code""" - - _path_to_template = "assets/prompt_templates/generate_synthetic_data.tmpl" diff --git a/pandasai/prompts/generate_system_message.py b/pandasai/prompts/generate_system_message.py new file mode 100644 index 000000000..855efd262 --- /dev/null +++ b/pandasai/prompts/generate_system_message.py @@ -0,0 +1,7 @@ +from .base import BasePrompt + + +class GenerateSystemMessagePrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" + + template_path = "generate_system_message.tmpl" diff --git a/pandasai/prompts/rephase_query_prompt.py b/pandasai/prompts/rephase_query_prompt.py index e746e00ae..fc542ac2c 100644 --- a/pandasai/prompts/rephase_query_prompt.py +++ b/pandasai/prompts/rephase_query_prompt.py @@ -1,35 +1,7 @@ -""" Prompt to rephrase query to get more accurate responses -You are provided with the following pandas DataFrames: +from .base import BasePrompt -{dataframes} -{conversation} -Use the provided dataframe and conversation we have had to Return the rephrased -sentence of "{query}” in order to obtain more accurate and comprehensive responses -without any explanations. -""" -from typing import List +class RephraseQueryPrompt(BasePrompt): + """Prompt to generate Python code from a dataframe.""" -import pandasai.pandas as pd - -from .file_based_prompt import FileBasedPrompt - - -class RephraseQueryPrompt(FileBasedPrompt): - """Prompt to rephrase query to get more accurate responses""" - - _path_to_template = "assets/prompt_templates/rephrase_query_prompt.tmpl" - - def setup( - self, query: str, dataframes: List[pd.DataFrame], conversation: str - ) -> None: - conversation_content = ( - self.conversation_text.format( - conversation=conversation, dataframes=dataframes, query=query - ) - if conversation and dataframes and query - else "" - ) - self.set_var("conversation", conversation_content) - self.set_var("query", query) - self.set_var("dfs", dataframes) + template_path = "rephrase_query.tmpl" diff --git a/pandasai/assets/prompt_templates/check_if_relevant_to_conversation.tmpl b/pandasai/prompts/templates/check_if_relevant_to_conversation.tmpl similarity index 75% rename from pandasai/assets/prompt_templates/check_if_relevant_to_conversation.tmpl rename to pandasai/prompts/templates/check_if_relevant_to_conversation.tmpl index 058867102..87a30ba0b 100644 --- a/pandasai/assets/prompt_templates/check_if_relevant_to_conversation.tmpl +++ b/pandasai/prompts/templates/check_if_relevant_to_conversation.tmpl @@ -1,9 +1,9 @@ -{conversation} +{{context.memory.get_conversation()}} -{query} +{{query}} Is the query somehow related to the previous conversation? Answer only "true" or "false" (lowercase). \ No newline at end of file diff --git a/pandasai/assets/prompt_templates/clarification_questions_prompt.tmpl b/pandasai/prompts/templates/clarification_questions_prompt.tmpl similarity index 63% rename from pandasai/assets/prompt_templates/clarification_questions_prompt.tmpl rename to pandasai/prompts/templates/clarification_questions_prompt.tmpl index 268b5a7d2..80bed1662 100644 --- a/pandasai/assets/prompt_templates/clarification_questions_prompt.tmpl +++ b/pandasai/prompts/templates/clarification_questions_prompt.tmpl @@ -1,12 +1,10 @@ -You are provided with the following pandas DataFrames: - -{dataframes} +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} -{conversation} +{{context.memory.get_conversation()}} -Find the clarification questions that could be asked to a senior data scientist would ask about the query "{query}"? +Find the clarification questions that could be asked to a senior data scientist would ask about the query "{{query}}"? - Only ask for questions related to the query if the query is not clear or ambiguous and that cannot be deduced from the context. - Return up to 3 questions. diff --git a/pandasai/prompts/templates/correct_error_prompt.tmpl b/pandasai/prompts/templates/correct_error_prompt.tmpl new file mode 100644 index 000000000..e51023400 --- /dev/null +++ b/pandasai/prompts/templates/correct_error_prompt.tmpl @@ -0,0 +1,12 @@ +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} + +The user asked the following question: +{{context.memory.get_conversation()}} + +You generated this python code: +{{code}} + +It fails with the following error: +{{error}} + +Fix the python code above and return the new python code: \ No newline at end of file diff --git a/pandasai/prompts/templates/correct_execute_sql_query_usage_error_prompt.tmpl b/pandasai/prompts/templates/correct_execute_sql_query_usage_error_prompt.tmpl new file mode 100644 index 000000000..523608f71 --- /dev/null +++ b/pandasai/prompts/templates/correct_execute_sql_query_usage_error_prompt.tmpl @@ -0,0 +1,9 @@ +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} + +The user asked the following question: +{{context.memory.get_conversation()}} + +You generated this python code: +{{code}} + +Fix the python code above and return the new python code but the code generated should use execute_sql_query function \ No newline at end of file diff --git a/pandasai/prompts/templates/correct_output_type_error_prompt.tmpl b/pandasai/prompts/templates/correct_output_type_error_prompt.tmpl new file mode 100644 index 000000000..220ba3fa6 --- /dev/null +++ b/pandasai/prompts/templates/correct_output_type_error_prompt.tmpl @@ -0,0 +1,9 @@ +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} + +The user asked the following question: +{{context.memory.get_conversation()}} + +You generated this python code: +{{code}} + +Fix the python code above and return the new python code but the result type should be: {{output_type}} diff --git a/pandasai/assets/prompt_templates/explain_prompt.tmpl b/pandasai/prompts/templates/explain.tmpl similarity index 65% rename from pandasai/assets/prompt_templates/explain_prompt.tmpl rename to pandasai/prompts/templates/explain.tmpl index cc03c3615..ccb2f4dcf 100644 --- a/pandasai/assets/prompt_templates/explain_prompt.tmpl +++ b/pandasai/prompts/templates/explain.tmpl @@ -1,15 +1,13 @@ - The previous conversation we had -{conversation} +{{context.memory.get_conversation()}} Based on the last conversation you generated the following code: -{code} +{{code}} -Explain how you came up with code for non-technical people without -mentioning technical details or mentioning the libraries used? +Explain how you came up with code for non-technical people without mentioning technical details or mentioning the libraries used? diff --git a/pandasai/prompts/templates/generate_python_code.tmpl b/pandasai/prompts/templates/generate_python_code.tmpl new file mode 100644 index 000000000..2c85a2270 --- /dev/null +++ b/pandasai/prompts/templates/generate_python_code.tmpl @@ -0,0 +1,29 @@ +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} + +{% if context.skills_manager.has_skills() %} +{{context.skills_manager.prompt_display()}} +{% endif %} +{% if last_code_generated != "" and context.memory.count() > 0 %} +{{ last_code_generated }} +{% else %} +Update this initial code: +```python +# TODO: import the required dependencies +import pandas as pd + +# Write code here + +# Declare result var: {% include 'shared/output_type_template.tmpl' with context %} +``` +{% endif %} +{% include 'shared/vectordb_docs.tmpl' with context %} +{{ context.memory.get_last_message() }} + +Variable `dfs: list[pd.DataFrame]` is already declared. + +At the end, declare "result" variable as a dictionary of type and value. +{% if viz_lib %} +If you are asked to plot a chart, use "{{viz_lib}}" for charts, save as png. +{% endif %} + +Generate python code and return full updated code: \ No newline at end of file diff --git a/pandasai/prompts/templates/generate_python_code_with_sql.tmpl b/pandasai/prompts/templates/generate_python_code_with_sql.tmpl new file mode 100644 index 000000000..00d26f908 --- /dev/null +++ b/pandasai/prompts/templates/generate_python_code_with_sql.tmpl @@ -0,0 +1,41 @@ + +{% for df in context.dfs %} +{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %} +
{% endfor %} +
+ +{% if context.skills_manager.has_skills() %} +{{context.skills_manager.prompt_display()}} +{% else %} +You can call the following functions that have been pre-defined for you: +{% endif %} + +def execute_sql_query(sql_query: str) -> pd.Dataframe + """This method connects to the database, executes the sql query and returns the dataframe""" + + +{% if last_code_generated != "" and context.memory.count() > 0 %} +{{ last_code_generated }} +{% else %} +Update this initial code: +```python +# TODO: import the required dependencies +import pandas as pd + +# Write code here + +# Declare result var: {% include 'shared/output_type_template.tmpl' with context %} +``` +{% endif %} +{% include 'shared/vectordb_docs.tmpl' with context %} +{{ context.memory.get_last_message() }} +Variable `dfs: list[pd.DataFrame]` is already declared. + +At the end, declare "result" variable as a dictionary of type and value. +{% if viz_lib %} +If you are asked to plot a chart, use "{{viz_lib}}" for charts, save as png. +{% endif %} + +Generate python code and return full updated code: + +### Note: Use only relevant table for query and do aggregation, sorting, joins and grouby through sql query \ No newline at end of file diff --git a/pandasai/prompts/templates/generate_system_message.tmpl b/pandasai/prompts/templates/generate_system_message.tmpl new file mode 100644 index 000000000..b27f2011c --- /dev/null +++ b/pandasai/prompts/templates/generate_system_message.tmpl @@ -0,0 +1,5 @@ +{% if memory.agent_info %} {{memory.get_system_prompt()}} {% endif %} +{% if memory.count() > 1 %} +### PREVIOUS CONVERSATION +{{ memory.get_previous_conversation() }} +{% endif %} \ No newline at end of file diff --git a/pandasai/prompts/templates/rephrase_query.tmpl b/pandasai/prompts/templates/rephrase_query.tmpl new file mode 100644 index 000000000..6d06e70b5 --- /dev/null +++ b/pandasai/prompts/templates/rephrase_query.tmpl @@ -0,0 +1,9 @@ +{% for df in context.dfs %}{% set index = loop.index %}{% include 'shared/dataframe.tmpl' with context %}{% endfor %} + + +{{context.memory.get_conversation()}} + + +Use the provided dataframe and conversation to return the rephrased sentence of "{{query}}” in order to obtain more accurate and comprehensive responses without any explanations. + +Rephrased query: \ No newline at end of file diff --git a/pandasai/prompts/templates/shared/dataframe.tmpl b/pandasai/prompts/templates/shared/dataframe.tmpl new file mode 100644 index 000000000..9a6c1b2df --- /dev/null +++ b/pandasai/prompts/templates/shared/dataframe.tmpl @@ -0,0 +1 @@ +{{ df.to_string(index-1, context.config.direct_sql, context.config.dataframe_serializer) }} \ No newline at end of file diff --git a/pandasai/prompts/templates/shared/output_type_template.tmpl b/pandasai/prompts/templates/shared/output_type_template.tmpl new file mode 100644 index 000000000..c792693c4 --- /dev/null +++ b/pandasai/prompts/templates/shared/output_type_template.tmpl @@ -0,0 +1,11 @@ +{% if not output_type %} +type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" } +{% elif output_type == "number" %} +type (must be "number"), value must int. Example: { "type": "number", "value": 125 } +{% elif output_type == "string" %} +type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." } +{% elif output_type == "dataframe" %} +type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) } +{% elif output_type == "plot" %} +type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" } +{% endif %} \ No newline at end of file diff --git a/pandasai/prompts/templates/shared/vectordb_docs.tmpl b/pandasai/prompts/templates/shared/vectordb_docs.tmpl new file mode 100644 index 000000000..ab12f70f4 --- /dev/null +++ b/pandasai/prompts/templates/shared/vectordb_docs.tmpl @@ -0,0 +1,8 @@ +{% if context.vectorstore %}{% set documents = context.vectorstore.get_relevant_qa_documents(context.memory.get_last_message()) %} +{% if documents|length > 0%}You can utilize these examples as a reference for generating code.{% endif %} +{% for document in documents %} +{{ document}}{% endfor %}{% endif %} +{% if context.vectorstore %}{% set documents = context.vectorstore.get_relevant_docs_documents(context.memory.get_last_message()) %} +{% if documents|length > 0%}Here are additional documents for reference. Feel free to use them to answer.{% endif %} +{% for document in documents %}{{ document}} +{% endfor %}{% endif %} \ No newline at end of file diff --git a/pandasai/responses/context.py b/pandasai/responses/context.py index 7f2e6a699..31718a60e 100644 --- a/pandasai/responses/context.py +++ b/pandasai/responses/context.py @@ -4,20 +4,16 @@ class Context: """ - Context class that contains context from SmartDataLake for ResponseParsers - Context contain the application config, logger and engine( pandas, polars etc). + Context class that contains context from Agent for ResponseParsers + Context contain the application config and logger. """ _config = None _logger = None - _engine: str = None - def __init__( - self, config: Config, logger: Logger = None, engine: str = None - ) -> None: + def __init__(self, config: Config, logger: Logger = None) -> None: self._config = config self._logger = logger - self._engine = engine @property def config(self): @@ -28,8 +24,3 @@ def config(self): def logger(self): """Getter for _logger attribute.""" return self._logger - - @property - def engine(self): - """Getter for _engine attribute.""" - return self._engine diff --git a/pandasai/responses/response_parser.py b/pandasai/responses/response_parser.py index dc229df14..e16e5b5f5 100644 --- a/pandasai/responses/response_parser.py +++ b/pandasai/responses/response_parser.py @@ -5,8 +5,6 @@ from pandasai.exceptions import MethodNotImplementedError -from ..helpers.df_info import polars_imported - class IResponseParser(ABC): @abstractmethod @@ -29,9 +27,9 @@ class ResponseParser(IResponseParser): def __init__(self, context) -> None: """ - Initialize the ResponseParser with Context from SmartDataLake + Initialize the ResponseParser with Context from Agent Args: - context (Context): context contains the config, logger and engine + context (Context): context contains the config and logger """ self._context = context @@ -51,34 +49,10 @@ def parse(self, result: dict) -> Any: ): raise ValueError("Unsupported result format") - if result["type"] == "dataframe": - return self.format_dataframe(result) - elif result["type"] == "plot": + if result["type"] == "plot": return self.format_plot(result) else: - return self.format_other(result) - - def format_dataframe(self, result: dict) -> Any: - """ - Format dataframe generate against a user query - Args: - result (dict): result contains type and value - Returns: - Any: Returns depending on the user input - """ - from ..smart_dataframe import SmartDataframe - - df = result["value"] - if self._context.engine == "polars" and polars_imported: - import polars as pl - - df = pl.from_pandas(df) - - return SmartDataframe( - df, - config=self._context._config.__dict__, - logger=self._context.logger, - ) + return result["value"] def format_plot(self, result: dict) -> Any: """ @@ -96,14 +70,3 @@ def format_plot(self, result: dict) -> Any: img.show() return result["value"] - - def format_other(self, result) -> Any: - """ - Returns the result generated against a user query other than dataframes - and plots - Args: - result (dict): result contains type and value - Returns: - Any: Returns depending on the user input - """ - return result["value"] diff --git a/pandasai/responses/response_serializer.py b/pandasai/responses/response_serializer.py new file mode 100644 index 000000000..bff4a6209 --- /dev/null +++ b/pandasai/responses/response_serializer.py @@ -0,0 +1,41 @@ +import base64 +import json + +import pandas as pd + +from pandasai.responses.response_type import ResponseType + + +class ResponseSerializer: + @staticmethod + def serialize_dataframe(df: pd.DataFrame): + json_data = json.loads(df.to_json(orient="split", date_format="iso")) + return {"headers": json_data["columns"], "rows": json_data["data"]} + + @staticmethod + def serialize(result: ResponseType) -> ResponseType: + """ + Format output response + Args: + result (ResponseType): response returned after execution + + Returns: + ResponseType: formatted response output + """ + if result["type"] == "dataframe": + df_dict = ResponseSerializer.serialize_dataframe(result["value"]) + return {"type": result["type"], "value": df_dict} + + elif result["type"] == "plot": + with open(result["value"], "rb") as image_file: + image_data = image_file.read() + # Encode the image data to Base64 + base64_image = ( + f"data:image/png;base64,{base64.b64encode(image_data).decode()}" + ) + return { + "type": result["type"], + "value": base64_image, + } + else: + return result diff --git a/pandasai/responses/response_type.py b/pandasai/responses/response_type.py new file mode 100644 index 000000000..c3ab44e45 --- /dev/null +++ b/pandasai/responses/response_type.py @@ -0,0 +1,6 @@ +from typing import Any, TypedDict + + +class ResponseType(TypedDict): + type: str + value: Any diff --git a/pandasai/schemas/df_config.py b/pandasai/schemas/df_config.py index 42333e4cb..a5d683a32 100644 --- a/pandasai/schemas/df_config.py +++ b/pandasai/schemas/df_config.py @@ -1,10 +1,10 @@ -from typing import Any, Dict, List, Optional, TypedDict +from typing import Any, List, Optional, TypedDict from pandasai.constants import DEFAULT_CHART_DIRECTORY +from pandasai.helpers.dataframe_serializer import DataframeSerializerType from pandasai.pydantic import BaseModel, Field, validator from ..exceptions import LLMNotFoundError -from ..helpers.viz_library_types.base import VisualizationLibrary from ..llm import LLM, LangchainLLM @@ -19,8 +19,6 @@ class Config(BaseModel): enforce_privacy: bool = False enable_cache: bool = True use_error_correction_framework: bool = True - custom_prompts: Dict = Field(default_factory=dict) - custom_instructions: Optional[str] = None open_charts: bool = True save_charts: bool = False save_charts_path: str = DEFAULT_CHART_DIRECTORY @@ -29,9 +27,10 @@ class Config(BaseModel): lazy_load_connector: bool = True response_parser: Any = None llm: Any = None - data_viz_library: Optional[VisualizationLibrary] = None + data_viz_library: Optional[str] = "" log_server: LogServerConfig = None direct_sql: bool = False + dataframe_serializer: DataframeSerializerType = DataframeSerializerType.YML class Config: arbitrary_types_allowed = True diff --git a/pandasai/skills/__init__.py b/pandasai/skills/__init__.py index 59c0d1993..4b6d2f207 100644 --- a/pandasai/skills/__init__.py +++ b/pandasai/skills/__init__.py @@ -64,6 +64,9 @@ def from_function(cls, func: Callable, **kwargs: Any) -> "Skill": """ return cls(func=func, **kwargs) + def stringify(self): + return inspect.getsource(self.func) + def __str__(self): return f""" diff --git a/pandasai/smart_dataframe/__init__.py b/pandasai/smart_dataframe/__init__.py index b07b46097..de5aa0c30 100644 --- a/pandasai/smart_dataframe/__init__.py +++ b/pandasai/smart_dataframe/__init__.py @@ -17,223 +17,29 @@ # The average loan amount is $15,000. ``` """ - -import hashlib import uuid from functools import cached_property from io import StringIO from typing import Any, List, Optional, Union import pandasai.pandas as pd +from pandasai.agent.base import Agent +from pandasai.connectors.pandas import PandasConnector from pandasai.helpers.df_validator import DfValidator from pandasai.pydantic import BaseModel from ..connectors.base import BaseConnector -from ..helpers.data_sampler import DataSampler -from ..helpers.df_config_manager import DfConfigManager -from ..helpers.df_info import DataFrameType, df_type -from ..helpers.from_google_sheets import from_google_sheets +from ..helpers.df_info import DataFrameType from ..helpers.logger import Logger -from ..helpers.shortcuts import Shortcuts -from ..llm import LLM, LangchainLLM from ..schemas.df_config import Config from ..skills import Skill -from ..smart_datalake import SmartDatalake -from .abstract_df import DataframeAbstract - - -class SmartDataframeCore: - """ - A smart dataframe class is a wrapper around the pandas/polars dataframe that allows - you to query it using natural language. It uses the LLMs to generate Python code - from natural language and then executes it on the dataframe. - """ - - _df = None - _df_loaded: bool = True - _temporary_loaded: bool = False - _connector: BaseConnector = None - _engine: str = None - _logger: Logger = None - - def __init__(self, df: DataFrameType, logger: Logger = None): - self._logger = logger - self._load_dataframe(df) - - def _load_dataframe(self, df): - """ - Load the dataframe from a file or a connector. - - Args: - df (Union[pd.DataFrame, pl.DataFrame, BaseConnector]): - Pandas, Modin or Polars dataframe or a connector. - """ - if isinstance(df, BaseConnector): - self.dataframe = None - self.connector = df - self.connector.logger = self._logger - self._df_loaded = False - elif isinstance(df, str): - self.dataframe = self._import_from_file(df) - elif isinstance(df, pd.Series): - self.dataframe = df.to_frame() - elif isinstance(df, (list, dict)): - # if the list can be converted to a dataframe, convert it - # otherwise, raise an error - try: - self.dataframe = pd.DataFrame(df) - except ValueError as e: - raise ValueError( - "Invalid input data. We cannot convert it to a dataframe." - ) from e - else: - self.dataframe = df - - def _import_from_file(self, file_path: str): - """ - Import a dataframe from a file (csv, parquet, xlsx) - - Args: - file_path (str): Path to the file to be imported. - - Returns: - pd.DataFrame: Pandas or Modin dataframe - """ - - if file_path.endswith(".csv"): - return pd.read_csv(file_path) - elif file_path.endswith(".parquet"): - return pd.read_parquet(file_path) - elif file_path.endswith(".xlsx"): - return pd.read_excel(file_path) - elif file_path.startswith("https://docs.google.com/spreadsheets/"): - return from_google_sheets(file_path)[0] - else: - raise ValueError("Invalid file format.") - - def _load_engine(self): - """ - Load the engine of the dataframe (Pandas, Modin or Polars) - """ - df_engine = df_type(self._df) - - if df_engine is None: - raise ValueError( - "Invalid input data. Must be a Pandas, Modin or Polars dataframe." - ) - - if df_engine != "polars" and not isinstance(self._df, pd.DataFrame): - raise ValueError( - f"The provided dataframe is a {df_engine} dataframe, but the current pandasai engine is {pd.__name__}. " - f"To use {df_engine}, please run `pandasai.set_engine('{df_engine}')`. " - ) - - self._engine = df_engine - - def _validate_and_convert_dataframe(self, df: DataFrameType) -> DataFrameType: - """ - Validate the dataframe and convert it to a Pandas or Polars dataframe. - - Args: - df (DataFrameType): Pandas or Polars dataframe or path to a file - - Returns: - DataFrameType: Pandas or Polars dataframe - """ - if isinstance(df, str): - return self._import_from_file(df) - elif isinstance(df, (list, dict)): - # if the list or dictionary can be converted to a dataframe, convert it - # otherwise, raise an error - try: - return pd.DataFrame(df) - except ValueError as e: - raise ValueError( - "Invalid input data. We cannot convert it to a dataframe." - ) from e - else: - return df - def load_connector(self, temporary: bool = False): - """ - Load a connector into the smart dataframe - - Args: - temporary (bool): Whether the connector is for one time usage. - If `True` passed, the connector will be unbound during - the next call of `dataframe` providing that dataframe has - been loaded. - """ - self.dataframe = self.connector.execute() - self._df_loaded = True - self._temporary_loaded = temporary - - def _unload_connector(self): - """ - Unload the connector from the smart dataframe. - This is done when a partial dataframe is loaded from a connector (i.e. - because of a filter) and we want to load the full dataframe or a different - partial dataframe. - """ - self._df = None - self._df_loaded = False - self._temporary_loaded = False - - @property - def dataframe(self) -> DataFrameType: - if self._df_loaded: - return_df = None - - if self._engine == "polars": - return_df = self._df.clone() - elif self._engine in ("pandas", "modin"): - return_df = self._df.copy() - - if self.has_connector and self._df_loaded and self._temporary_loaded: - self._unload_connector() - - return return_df - elif self.has_connector: - return None - - @dataframe.setter - def dataframe(self, df: DataFrameType): - """ - Load a dataframe into the smart dataframe - - Args: - df (DataFrameType): Pandas or Polars dataframe or path to a file - """ - df = self._validate_and_convert_dataframe(df) - self._df = df - - if df is not None: - self._load_engine() - - @property - def engine(self) -> str: - return self._engine - - @property - def connector(self): - return self._connector - - @connector.setter - def connector(self, connector: BaseConnector): - self._connector = connector - @property - def has_connector(self): - return self._connector is not None - - -class SmartDataframe(DataframeAbstract, Shortcuts): +class SmartDataframe: _table_name: str _table_description: str _custom_head: str = None _original_import: any - _core: SmartDataframeCore - _lake: SmartDatalake def __init__( self, @@ -255,49 +61,52 @@ def __init__( """ self._original_import = df - if ( - isinstance(df, str) - and not df.endswith(".csv") - and not df.endswith(".parquet") - and not df.endswith(".xlsx") - and not df.startswith("https://docs.google.com/spreadsheets/") - ): - if not (df_config := self._load_from_config(df)): - raise ValueError( - "Could not find a saved dataframe configuration " - "with the given name." - ) - - if "://" in df_config["import_path"]: - df = self._instantiate_connector(df_config["import_path"]) - else: - df = df_config["import_path"] + self._agent = Agent([df], config=config) - if name is None: - name = df_config["name"] - if description is None: - description = df_config["description"] - self._core = SmartDataframeCore(df, logger) + self.dataframe = self._agent.context.dfs[0] self._table_description = description self._table_name = name - self._lake = SmartDatalake([self], config, logger) - - # set instance type in SmartDataLake - self._lake.set_instance_type(self.__class__.__name__) - - # If no name is provided, use the fallback name provided the connector - if self.connector: - self._table_name = self.connector.fallback_name if custom_head is not None: self._custom_head = custom_head.to_csv(index=False) + def load_dfs(self, df, name: str, description: str, custom_head: pd.DataFrame): + if isinstance(df, (pd.DataFrame, pd.Series, list, dict, str)): + df = PandasConnector( + {"original_df": df}, + name=name, + description=description, + custom_head=custom_head, + ) + else: + try: + import polars as pl + + if isinstance(df, pl.DataFrame): + from ..connectors.polars import PolarsConnector + + df = PolarsConnector( + {"original_df": df}, + name=name, + description=description, + custom_head=custom_head, + ) + else: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) + except ImportError as e: + raise ValueError( + "Invalid input data. We cannot convert it to a dataframe." + ) from e + return df + def add_skills(self, *skills: Skill): """ Add Skills to PandasAI """ - self.lake.add_skills(*skills) + self._agent.add_skills(*skills) def chat(self, query: str, output_type: Optional[str] = None): """ @@ -320,152 +129,7 @@ def chat(self, query: str, output_type: Optional[str] = None): Raises: ValueError: If the query is empty """ - return self.lake.chat(query, output_type) - - def column_hash(self) -> str: - """ - Get the hash of the columns of the dataframe. - - Returns: - str: Hash of the columns of the dataframe - """ - if not self._core._df_loaded and self.connector: - return self.connector.column_hash - - columns_str = "".join(self.dataframe.columns) - hash_object = hashlib.sha256(columns_str.encode()) - return hash_object.hexdigest() - - def save(self, name: str = None): - """ - Saves the dataframe configuration to be used for later - - Args: - name (str, optional): Name of the dataframe configuration. Defaults to None. - """ - - config_manager = DfConfigManager(self) - config_manager.save(name) - - def load_connector(self, temporary: bool = False): - """ - Load a connector into the smart dataframe - - Args: - temporary (bool, optional): Whether the connector is temporary or not. - Defaults to False. - """ - self._core.load_connector(temporary) - - def _instantiate_connector(self, import_path: str) -> BaseConnector: - connector_name = import_path.split("://")[0] - connector_path = import_path.split("://")[1] - connector_host = connector_path.split(":")[0] - connector_port = connector_path.split(":")[1].split("/")[0] - connector_database = connector_path.split(":")[1].split("/")[1] - connector_table = connector_path.split(":")[1].split("/")[2] - - connector_data = { - "host": connector_host, - "database": connector_database, - "table": connector_table, - } - if connector_port: - connector_data["port"] = connector_port - - # instantiate the connector - return getattr( - __import__("pandasai.connectors", fromlist=[connector_name]), - connector_name, - )(config=connector_data) - - def _truncate_head_columns(self, df: DataFrameType, max_size=25) -> DataFrameType: - """ - Truncate the columns of the dataframe to a maximum of 20 characters. - - Args: - df (DataFrameType): Pandas, Modin or Polars dataframe - - Returns: - DataFrameType: Pandas, Modin or Polars dataframe - """ - if (engine := df_type(df)) in ("pandas", "modin"): - df_trunc = df.copy() - - for col in df.columns: - if df[col].dtype == "object": - first_val = df[col].iloc[0] - if isinstance(first_val, str) and len(first_val) > max_size: - df_trunc[col] = f"{df_trunc[col].str.slice(0, max_size - 3)}..." - elif engine == "polars": - try: - import polars as pl - - df_trunc = df.clone() - - for col in df.columns: - if df[col].dtype == pl.Utf8: - first_val = df[col][0] - if isinstance(first_val, str) and len(df_trunc[col]) > max_size: - df_trunc[ - col - ] = f"{df_trunc[col].str.slice(0, max_size - 3)}..." - except ImportError as e: - raise ImportError( - "Polars is not installed. " - "Please install Polars to use this feature." - ) from e - else: - raise ValueError( - f"Unrecognized engine {engine}. It must either be pandas, modin or polars." - ) - - return df_trunc - - def _get_sample_head(self) -> DataFrameType: - head = None - rows_to_display = 0 if self.lake.config.enforce_privacy else 3 - if self._custom_head is not None: - head = self.custom_head - elif not self._core._df_loaded and self.connector: - head = self.connector.head() - else: - head = self.dataframe.head(rows_to_display) - - if head is None: - return None - - sampler = DataSampler(head) - sampled_head = sampler.sample(rows_to_display) - if self.lake.config.enforce_privacy: - return sampled_head - else: - return self._truncate_head_columns(sampled_head) - - def _load_from_config(self, name: str): - """ - Loads a saved dataframe configuration - """ - - config_manager = DfConfigManager(self) - return config_manager.load(name) - - @property - def dataframe(self) -> DataFrameType: - return self._core.dataframe - - @property - def engine(self): - return self._core.engine - - @property - def connector(self): - return self._core.connector - - @connector.setter - def connector(self, connector: BaseConnector): - connector.logger = self.logger - self._core.connector = connector + return self._agent.chat(query, output_type) def validate(self, schema: BaseModel): """ @@ -477,36 +141,6 @@ def validate(self, schema: BaseModel): df_validator = DfValidator(self.dataframe) return df_validator.validate(schema) - @property - def lake(self) -> SmartDatalake: - return self._lake - - @lake.setter - def lake(self, lake: SmartDatalake): - self._lake = lake - - @property - def rows_count(self): - if self._core._df_loaded: - return self.dataframe.shape[0] - elif self.connector is not None: - return self.connector.rows_count - else: - raise ValueError( - "Cannot determine rows_count. No dataframe or connector loaded." - ) - - @property - def columns_count(self): - if self._core._df_loaded: - return self.dataframe.shape[1] - elif self.connector is not None: - return self.connector.columns_count - else: - raise ValueError( - "Cannot determine columns_count. No dataframe or connector loaded." - ) - @cached_property def head_df(self): """ @@ -515,7 +149,7 @@ def head_df(self): Returns: DataFrameType: Pandas, Modin or Polars dataframe """ - return self._get_sample_head() + return self.dataframe.get_head() @cached_property def head_csv(self): @@ -525,141 +159,87 @@ def head_csv(self): Returns: str: CSV string """ - df_head = self._get_sample_head() + df_head = self.dataframe.get_head() return df_head.to_csv(index=False) @property def last_prompt(self): - return self.lake.last_prompt + return self._agent.last_prompt @property def last_prompt_id(self) -> uuid.UUID: - return self.lake.last_prompt_id + return self._agent.last_prompt_id @property def last_code_generated(self): - return self.lake.last_code_executed + return self._agent.last_code_executed @property def last_code_executed(self): - return self.lake.last_code_executed - - @property - def last_result(self): - return self.lake.last_result - - @property - def last_error(self): - return self.lake.last_error - - @property - def cache(self): - return self.lake.cache + return self._agent.last_code_executed def original_import(self): return self._original_import @property def logger(self): - return self.lake.logger + return self._agent.logger @logger.setter def logger(self, logger: Logger): - self.lake.logger = logger + self._agent.logger = logger @property def logs(self): - return self.lake.logs + return self._agent.context.config.logs @property def verbose(self): - return self.lake.verbose + return self._agent.context.config.verbose @verbose.setter def verbose(self, verbose: bool): - self.lake.verbose = verbose + self._agent.context.config.verbose = verbose @property def save_logs(self): - return self.lake.save_logs + return self._agent.context.config.save_logs @save_logs.setter def save_logs(self, save_logs: bool): - self.lake.save_logs = save_logs + self._agent.context.config.save_logs = save_logs @property def enforce_privacy(self): - return self.lake.enforce_privacy + return self._agent.context.config.enforce_privacy @enforce_privacy.setter def enforce_privacy(self, enforce_privacy: bool): - self.lake.enforce_privacy = enforce_privacy + self._agent.context.config.enforce_privacy = enforce_privacy @property def enable_cache(self): - return self.lake.enable_cache + return self._agent.context.config.enable_cache @enable_cache.setter def enable_cache(self, enable_cache: bool): - self.lake.enable_cache = enable_cache - - @property - def use_error_correction_framework(self): - return self.lake.use_error_correction_framework - - @use_error_correction_framework.setter - def use_error_correction_framework(self, use_error_correction_framework: bool): - self.lake.use_error_correction_framework = use_error_correction_framework - - @property - def custom_prompts(self): - return self.lake.custom_prompts - - @custom_prompts.setter - def custom_prompts(self, custom_prompts: dict): - self.lake.custom_prompts = custom_prompts + self._agent.context.config.enable_cache = enable_cache @property def save_charts(self): - return self.lake.save_charts + return self._agent.context.config.save_charts @save_charts.setter def save_charts(self, save_charts: bool): - self.lake.save_charts = save_charts + self._agent.context.config.save_charts = save_charts @property def save_charts_path(self): - return self.lake.save_charts_path + return self._agent.context.config.save_charts_path @save_charts_path.setter def save_charts_path(self, save_charts_path: str): - self.lake.save_charts_path = save_charts_path - - @property - def custom_whitelisted_dependencies(self): - return self.lake.custom_whitelisted_dependencies - - @custom_whitelisted_dependencies.setter - def custom_whitelisted_dependencies( - self, custom_whitelisted_dependencies: List[str] - ): - self.lake.custom_whitelisted_dependencies = custom_whitelisted_dependencies - - @property - def max_retries(self): - return self.lake.max_retries - - @max_retries.setter - def max_retries(self, max_retries: int): - self.lake.max_retries = max_retries - - @property - def llm(self): - return self.lake.llm - - @llm.setter - def llm(self, llm: Union[LLM, LangchainLLM]): - self.lake.llm = llm + self._agent.context.config.save_charts_path = save_charts_path @property def table_name(self): @@ -674,13 +254,15 @@ def custom_head(self): data = StringIO(self._custom_head) return pd.read_csv(data) - @custom_head.setter - def custom_head(self, custom_head: pd.DataFrame): - self._custom_head = custom_head.to_csv(index=False) - @property def last_query_log_id(self): - return self._lake.last_query_log_id + return self._agent.last_query_log_id + + def __len__(self): + return len(self.dataframe) + + def __eq__(self, other): + return self.dataframe.equals(other.dataframe) def __getattr__(self, name): if name in self._core.__dir__(): @@ -696,29 +278,6 @@ def __getitem__(self, key): def __setitem__(self, key, value): return self.dataframe.__setitem__(key, value) - def __dir__(self): - return dir(self._core) + dir(self.dataframe) + dir(self.__class__) - - def __repr__(self): - return self.dataframe.__repr__() - - def __len__(self): - return len(self.dataframe) - - def __eq__(self, other): - if isinstance(other, self.__class__) and ( - self._core.has_connector and other._core.has_connector - ): - return self._core.connector.equals(other._core.connector) - - return False - - def is_connector(self): - return self._core.has_connector - - def get_query_exec_func(self): - return self._core.connector.execute_direct_sql_query - def load_smartdataframes( dfs: List[Union[DataFrameType, Any]], config: Config diff --git a/pandasai/smart_dataframe/abstract_df.py b/pandasai/smart_dataframe/abstract_df.py deleted file mode 100644 index e85244917..000000000 --- a/pandasai/smart_dataframe/abstract_df.py +++ /dev/null @@ -1,338 +0,0 @@ -from abc import ABC - - -class DataframeAbstract(ABC): - _engine: str - - @property - def dataframe(self): - raise NotImplementedError("This method must be implemented in the child class") - - # Columns - @property - def columns(self) -> list: - return self.dataframe.columns - - def rename(self, columns): - """ - A proxy-call to the dataframe's `.rename()`. - """ - return self.dataframe.rename(columns=columns) - - # Index - @property - def index(self): - return self.dataframe.index - - def set_index(self, keys): - """ - A proxy-call to the dataframe's `.set_index()`. - """ - return self.dataframe.set_index(keys=keys) - - def reset_index(self, drop=False): - """ - A proxy-call to the dataframe's `.reset_index()`. - """ - return self.dataframe.reset_index(drop=drop) - - # Data - def head(self, n): - """ - A proxy-call to the dataframe's `.head()`. - """ - return self.dataframe.head(n=n) - - def tail(self, n): - """ - A proxy-call to the dataframe's `.tail()`. - """ - return self.dataframe.tail(n=n) - - def sample(self, n): - """ - A proxy-call to the dataframe's `.sample()`. - """ - return self.dataframe.sample(n=n) - - def describe(self): - """ - A proxy-call to the dataframe's `.describe()`. - """ - return self.dataframe.describe() - - # Missing data - def isna(self): - """ - A proxy-call to the dataframe's `.isna()`. - """ - return self.dataframe.isna() - - def notna(self): - """ - A proxy-call to the dataframe's `.notna()`. - """ - return self.dataframe.notna() - - def dropna(self, axis): - """ - A proxy-call to the dataframe's `.dropna()`. - """ - return self.dataframe.dropna(axis=axis) - - def fillna(self, value): - """ - A proxy-call to the dataframe's `.fillna()`. - """ - return self.dataframe.fillna(value=value) - - # Duplicates - def duplicated(self): - """ - A proxy-call to the dataframe's `.duplicated()`. - """ - return self.dataframe.duplicated() - - def drop_duplicates(self, subset): - """ - A proxy-call to the dataframe's `.drop_duplicates()`. - """ - return self.dataframe.drop_duplicates(subset=subset) - - # Transform - def apply(self, func): - """ - A proxy-call to the dataframe's `.apply()`. - """ - return self.dataframe.apply(func=func) - - def applymap(self, func): - """ - A proxy-call to the dataframe's `.applymap()`. - """ - return self.dataframe.applymap(func=func) - - def pipe(self, func): - """ - A proxy-call to the dataframe's `.pipe()`. - """ - return self.dataframe.pipe(func=func) - - # Groupby - def groupby(self, by): - """ - A proxy-call to the dataframe's `.groupby()`. - """ - return self.dataframe.groupby(by=by) - - def pivot(self, index, columns, values): - """ - A proxy-call to the dataframe's `.pivot()`. - """ - return self.dataframe.pivot(index=index, columns=columns, values=values) - - def unstack(self): - """ - A proxy-call to the dataframe's `.unstack()`. - """ - return self.dataframe.unstack() - - # Join/Merge - def append(self, other): - """ - A proxy-call to the dataframe's `.append()`. - """ - return self.dataframe.append(other=other) - - def join(self, other): - """ - A proxy-call to the dataframe's `.join()`. - """ - return self.dataframe.join(other=other) - - def merge(self, other): - """ - A proxy-call to the dataframe's `.merge()`. - """ - return self.dataframe.merge(other=other) - - # Combine - def concat(self, others): - """ - A proxy-call to the dataframe's `.concat()`. - """ - return self.dataframe.concat(others=others) - - # Statistical - def count(self): - """ - A proxy-call to the dataframe's `.count()`. - """ - return self.dataframe.count() - - def mean(self): - """ - A proxy-call to the dataframe's `.mean()`. - """ - return self.dataframe.mean() - - def median(self): - """ - A proxy-call to the dataframe's `.median()`. - """ - return self.dataframe.median() - - def std(self): - """ - A proxy-call to the dataframe's `.std()`. - """ - return self.dataframe.std() - - def min(self): - """ - A proxy-call to the dataframe's `.min()`. - """ - return self.dataframe.min() - - def max(self): - """ - A proxy-call to the dataframe's `.max()`. - """ - return self.dataframe.max() - - def abs(self): - """ - A proxy-call to the dataframe's `.abs()`. - """ - return self.dataframe.abs() - - def prod(self): - """ - A proxy-call to the dataframe's `.prod()`. - """ - return self.dataframe.prod() - - def sum(self): - """ - A proxy-call to the dataframe's `.sum()`. - """ - return self.dataframe.sum() - - def nunique(self): - """ - A proxy-call to the dataframe's `.nunique()`. - """ - return self.dataframe.nunique() - - def value_counts(self): - """ - A proxy-call to the dataframe's `.value_counts()`. - """ - return self.dataframe.value_counts() - - def corr(self): - """ - A proxy-call to the dataframe's `.corr()`. - """ - return self.dataframe.corr() - - def cov(self): - """ - A proxy-call to the dataframe's `.cov()`. - """ - return self.dataframe.cov() - - # Window - def rolling(self, window): - """ - A proxy-call to the dataframe's `.window()`. - """ - return self.dataframe.rolling(window=window) - - def expanding(self, min_periods): - """ - A proxy-call to the dataframe's `.expanding()`. - """ - return self.dataframe.expanding(min_periods=min_periods) - - def resample(self, rule): - """ - A proxy-call to the dataframe's `.resample()`. - """ - return self.dataframe.resample(rule=rule) - - # Plotting - def plot(self): - """ - A proxy-call to the dataframe's `.plot()`. - """ - return self.dataframe.plot() - - def hist(self): - """ - A proxy-call to the dataframe's `.hist()`. - """ - return self.dataframe.hist() - - # Exporting - def to_csv(self, path): - """ - A proxy-call to the dataframe's `.to_csv()`. - """ - return self.dataframe.to_csv(path_or_buf=path) - - def to_json(self, path): - """ - A proxy-call to the dataframe's `.to_json()`. - """ - return self.dataframe.to_json(path=path) - - def to_sql(self, name, con): - """ - A proxy-call to the dataframe's `.to_sql()`. - """ - return self.dataframe.to_sql(name=name, con=con) - - def to_dict(self, orient="dict", into=dict, as_series=True): - """ - A proxy-call to the dataframe's `.to_dict()`. - """ - if self._engine in ("pandas", "modin"): - return self.dataframe.to_dict(orient=orient, into=into) - elif self._engine == "polars": - return self.dataframe.to_dict(as_series=as_series) - raise RuntimeError( - f"{self.__class__} object has unknown engine type. " - f"Possible engines: 'pandas', 'polars'. Actual '{self._engine}'." - ) - - def to_numpy(self): - """ - A proxy-call to the dataframe's `.to_numpy()`. - """ - return self.dataframe.to_numpy() - - def to_markdown(self): - """ - A proxy-call to the dataframe's `.to_markdown()`. - """ - return self.dataframe.to_markdown() - - def to_parquet(self): - """ - A proxy-call to the dataframe's `.to_parquet()`. - """ - return self.dataframe.to_parquet() - - # Query - def query(self, expr): - """ - A proxy-call to the dataframe's `.query()`. - """ - return self.dataframe.query(expr=expr) - - def filter(self, expr): - """ - A proxy-call to the dataframe's `.filter()`. - """ - return self.dataframe.filter(items=expr) diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index 5ce32e7fe..67d6caa95 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -17,68 +17,22 @@ # The average loan amount is $15,000. ``` """ -import logging -import os -import threading import uuid from typing import Any, List, Optional, Union -from pandasai.constants import DEFAULT_CHART_DIRECTORY, DEFAULT_FILE_PERMISSIONS -from pandasai.helpers.output_types import output_type_factory -from pandasai.helpers.query_exec_tracker import QueryExecTracker -from pandasai.helpers.skills_manager import SkillsManager -from pandasai.helpers.viz_library_types import viz_lib_type_factory -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.responses.context import Context -from pandasai.responses.response_parser import ResponseParser +from pandasai.agent.base import Agent from pandasai.skills import Skill -from ..config import load_config from ..helpers.cache import Cache -from ..helpers.code_manager import CodeManager from ..helpers.df_info import DataFrameType -from ..helpers.logger import Logger -from ..helpers.memory import Memory -from ..helpers.path import find_project_root -from ..helpers.viz_library_types.base import VisualizationLibrary -from ..llm.base import LLM -from ..llm.langchain import LangchainLLM -from ..pipelines.smart_datalake_chat.generate_smart_datalake_pipeline import ( - GenerateSmartDatalakePipeline, -) -from ..prompts.base import AbstractPrompt -from ..prompts.correct_error_prompt import CorrectErrorPrompt from ..schemas.df_config import Config class SmartDatalake: - _dfs: List[DataFrameType] - _config: Union[Config, dict] - _llm: LLM - _cache: Cache = None - _logger: Logger - _last_prompt_id: uuid.UUID - _conversation_id: uuid.UUID - _code_manager: CodeManager - _memory: Memory - _skills: SkillsManager - _instance: str - _query_exec_tracker: QueryExecTracker - - _last_code_generated: str = None - _last_result: str = None - _last_error: str = None - - _viz_lib: str = None - _lock: threading.RLock = threading.RLock() - def __init__( self, dfs: List[Union[DataFrameType, Any]], config: Optional[Union[Config, dict]] = None, - logger: Optional[Logger] = None, - memory: Optional[Memory] = None, - cache: Optional[Cache] = None, ): """ Args: @@ -86,227 +40,13 @@ def __init__( config (Union[Config, dict], optional): Config to be used. Defaults to None. logger (Logger, optional): Logger to be used. Defaults to None. """ - - self._load_config(config) - - self.initialize() - - if logger: - self.logger = logger - else: - self.logger = Logger( - save_logs=self._config.save_logs, verbose=self._config.verbose - ) - - self._load_dfs(dfs) - - self._memory = memory or Memory() - self._code_manager = CodeManager( - dfs=self._dfs, - config=self._config, - logger=self.logger, - ) - - self._skills = SkillsManager() - - if cache: - self._cache = cache - elif self._config.enable_cache: - self._cache = Cache() - - context = Context(self._config, self.logger, self.engine) - - if self._config.response_parser: - self._response_parser = self._config.response_parser(context) - else: - self._response_parser = ResponseParser(context) - - if self._config.data_viz_library: - self._viz_lib = self._config.data_viz_library.value - - self._conversation_id = uuid.uuid4() - - self._instance = self.__class__.__name__ - - self._query_exec_tracker = QueryExecTracker( - server_config=self._config.log_server, - ) - - def set_instance_type(self, type_: str): - self._instance = type_ - - def is_related_query(self, flag: bool): - self._query_exec_tracker.set_related_query(flag) - - def initialize(self): - """Initialize the SmartDatalake, create auxiliary directories. - - If 'save_charts' option is enabled, create '.exports/charts directory' - in case if it doesn't exist. - If 'enable_cache' option is enabled, Create './cache' in case if it - doesn't exist. - - Returns: - None - """ - - if self._config.save_charts: - charts_dir = self._config.save_charts_path - - # Add project root path if save_charts_path is default - if self._config.save_charts_path == DEFAULT_CHART_DIRECTORY: - try: - charts_dir = os.path.join( - (find_project_root()), self._config.save_charts_path - ) - self._config.save_charts_path = charts_dir - except ValueError: - charts_dir = os.path.join( - os.getcwd(), self._config.save_charts_path - ) - with self._lock: - os.makedirs(charts_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True) - - if self._config.enable_cache: - try: - cache_dir = os.path.join((find_project_root()), "cache") - except ValueError: - cache_dir = os.path.join(os.getcwd(), "cache") - os.makedirs(cache_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True) - - def _load_dfs(self, dfs: List[Union[DataFrameType, Any]]): - """ - Load all the dataframes to be used in the smart datalake. - - Args: - dfs (List[Union[DataFrameType, Any]]): List of dataframes to be used - """ - - from ..smart_dataframe import SmartDataframe - - smart_dfs = [] - for df in dfs: - if not isinstance(df, SmartDataframe): - smart_dfs.append( - SmartDataframe(df, config=self._config, logger=self.logger) - ) - else: - smart_dfs.append(df) - self._dfs = smart_dfs - - def _load_config(self, config: Union[Config, dict]): - """ - Load a config to be used to run the queries. - - Args: - config (Union[Config, dict]): Config to be used - """ - - config = load_config(config) - - if config.get("llm"): - self._load_llm(config["llm"]) - config["llm"] = self._llm - - if config.get("data_viz_library"): - self._load_data_viz_library(config["data_viz_library"]) - config["data_viz_library"] = self._data_viz_library - - self._config = Config(**config) - - def _load_llm(self, llm: LLM): - """ - Load a LLM to be used to run the queries. - Check if it is a PandasAI LLM or a Langchain LLM. - If it is a Langchain LLM, wrap it in a PandasAI LLM. - - Args: - llm (object): LLMs option to be used for API access - - Raises: - BadImportError: If the LLM is a Langchain LLM but the langchain package - is not installed - """ - if hasattr(llm, "_llm_type"): - llm = LangchainLLM(llm) - - self._llm = llm - - def _load_data_viz_library(self, data_viz_library: str): - """ - Load the appropriate instance for viz library type to use. - - Args: - data_viz_library (enum): TODO - - Raises: - TODO - """ - - self._data_viz_library = VisualizationLibrary.DEFAULT.value - if data_viz_library in (item.value for item in VisualizationLibrary): - self._data_viz_library = data_viz_library + self._agent = Agent(dfs, config=config) def add_skills(self, *skills: Skill): """ Add Skills to PandasAI """ - self._skills.add_skills(*skills) - - def _assign_prompt_id(self): - """Assign a prompt ID""" - - self._last_prompt_id = uuid.uuid4() - - if self.logger: - self.logger.log(f"Prompt ID: {self._last_prompt_id}") - - def _get_prompt( - self, - key: str, - default_prompt: AbstractPrompt, - default_values: Optional[dict] = None, - ) -> AbstractPrompt: - """ - Return a prompt by key. - - Args: - key (str): The key of the prompt - default_prompt (Type[AbstractPrompt]): The default prompt to use - default_values (Optional[dict], optional): The default values to use for the - prompt. Defaults to None. - - Returns: - AbstractPrompt: The prompt - """ - if default_values is None: - default_values = {} - - custom_prompt = self._config.custom_prompts.get(key) - prompt = custom_prompt or default_prompt - - # set default values for the prompt - prompt.set_config(self._config) - if "dfs" not in default_values: - prompt.set_var("dfs", self._dfs) - if "conversation" not in default_values: - prompt.set_var("conversation", self._memory.get_conversation()) - if "prev_conversation" not in default_values: - prompt.set_var( - "prev_conversation", self._memory.get_previous_conversation() - ) - if "last_message" not in default_values: - prompt.set_var("last_message", self._memory.get_last_message()) - - # Adds the skills to prompt if exist else display nothing - skills_prompt = self._skills.prompt_display() - prompt.set_var("skills", skills_prompt if skills_prompt is not None else "") - - for key, value in default_values.items(): - prompt.set_var(key, value) - - self.logger.log(f"Using prompt: {prompt}") - return prompt + self._agent.add_skills(*skills) def chat(self, query: str, output_type: Optional[str] = None): """ @@ -331,274 +71,78 @@ def chat(self, query: str, output_type: Optional[str] = None): Raises: ValueError: If the query is empty """ - - pipeline_context = self.prepare_context_for_smart_datalake_pipeline( - query=query, output_type=output_type - ) - - try: - result = GenerateSmartDatalakePipeline(pipeline_context, self.logger).run() - except Exception as exception: - self.last_error = str(exception) - self._query_exec_tracker.success = False - self._query_exec_tracker.publish() - - return ( - "Unfortunately, I was not able to answer your question, " - "because of the following error:\n" - f"\n{exception}\n" - ) - - self.update_intermediate_value_post_pipeline_execution(pipeline_context) - - # publish query tracker - self._query_exec_tracker.publish() - - return result - - def _validate_output(self, result: dict, output_type: Optional[str] = None): - """ - Validate the output of the code execution. - - Args: - result (Any): Result of executing the code - output_type (Optional[str]): Add a hint for LLM which - type should be returned by `analyze_data()` in generated - code. Possible values: "number", "dataframe", "plot", "string": - * number - specifies that user expects to get a number - as a response object - * dataframe - specifies that user expects to get - pandas/polars dataframe as a response object - * plot - specifies that user expects LLM to build - a plot - * string - specifies that user expects to get text - as a response object - If none `output_type` is specified, the type can be any - of the above or "text". - - Raises: - (ValueError): If the output is not valid - """ - - output_type_helper = output_type_factory(output_type, logger=self.logger) - result_is_valid, validation_logs = output_type_helper.validate(result) - - if result_is_valid: - self._query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": True, - "message": "Output Validation Successful", - } - ) - else: - self.logger.log("\n".join(validation_logs), level=logging.WARNING) - self._query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": False, - "message": "Output Validation Failed", - } - ) - raise ValueError("Output validation failed") - - def _get_viz_library_type(self) -> str: - """ - Get the visualization library type based on the configured library. - - Returns: - (str): Visualization library type - """ - - viz_lib_helper = viz_lib_type_factory(self._viz_lib, logger=self.logger) - return viz_lib_helper.template_hint - - def prepare_context_for_smart_datalake_pipeline( - self, query: str, output_type: Optional[str] = None - ) -> PipelineContext: - """ - Prepare Pipeline Context to initiate Smart Data Lake Pipeline. - - Args: - query (str): Query to run on the dataframe - output_type (Optional[str]): Add a hint for LLM which - type should be returned by `analyze_data()` in generated - code. Possible values: "number", "dataframe", "plot", "string": - * number - specifies that user expects to get a number - as a response object - * dataframe - specifies that user expects to get - pandas/polars dataframe as a response object - * plot - specifies that user expects LLM to build - a plot - * string - specifies that user expects to get text - as a response object - If none `output_type` is specified, the type can be any - of the above or "text". - - Returns: - PipelineContext: The Pipeline Context to be used by Smart Data Lake Pipeline. - """ - - self._query_exec_tracker.start_new_track() - - self.logger.log(f"Question: {query}") - self.logger.log(f"Running PandasAI with {self._llm.type} LLM...") - - self._assign_prompt_id() - - self._query_exec_tracker.add_query_info( - self._conversation_id, self._instance, query, output_type - ) - - self._query_exec_tracker.add_dataframes(self._dfs) - - self._memory.add(query, True) - - output_type_helper = output_type_factory(output_type, logger=self.logger) - viz_lib_helper = viz_lib_type_factory(self._viz_lib, logger=self.logger) - - pipeline_context = PipelineContext( - dfs=self.dfs, - config=self.config, - memory=self.memory, - cache=self.cache, - query_exec_tracker=self._query_exec_tracker, - ) - pipeline_context.add_intermediate_value("is_present_in_cache", False) - pipeline_context.add_intermediate_value( - "output_type_helper", output_type_helper - ) - pipeline_context.add_intermediate_value("viz_lib_helper", viz_lib_helper) - pipeline_context.add_intermediate_value( - "last_code_generated", self._last_code_generated - ) - pipeline_context.add_intermediate_value("get_prompt", self._get_prompt) - pipeline_context.add_intermediate_value("last_prompt_id", self.last_prompt_id) - pipeline_context.add_intermediate_value("skills", self._skills) - pipeline_context.add_intermediate_value("code_manager", self._code_manager) - pipeline_context.add_intermediate_value( - "response_parser", self._response_parser - ) - - return pipeline_context - - def update_intermediate_value_post_pipeline_execution( - self, pipeline_context: PipelineContext - ): - """ - After the Smart Data Lake Pipeline has executed, update values of Smart Data Lake object. - - Args: - pipeline_context (PipelineContext): Pipeline Context after the Smart Data Lake pipeline execution - - """ - self._last_code_generated = pipeline_context.get_intermediate_value( - "last_code_generated" - ) - self._last_result = pipeline_context.get_intermediate_value("last_result") - - def _retry_run_code(self, code: str, e: Exception) -> List: - """ - A method to retry the code execution with error correction framework. - - Args: - code (str): A python code - e (Exception): An exception - dataframes - - Returns (str): A python code - """ - - self.logger.log(f"Failed with error: {e}. Retrying", logging.ERROR) - - default_values = { - "engine": self._dfs[0].engine, - "code": code, - "error_returned": e, - } - error_correcting_instruction = self._get_prompt( - "correct_error", - default_prompt=CorrectErrorPrompt(), - default_values=default_values, - ) - - return self._llm.generate_code(error_correcting_instruction) + return self._agent.chat(query, output_type) def clear_memory(self): """ Clears the memory """ - self._memory.clear() - self._conversation_id = uuid.uuid4() - - @property - def engine(self): - return self._dfs[0].engine + self._agent.clear_memory() @property def last_prompt(self): - return self._llm.last_prompt + return self._agent.last_prompt @property def last_prompt_id(self) -> uuid.UUID: """Return the id of the last prompt that was run.""" - if self._last_prompt_id is None: + if self._agent.last_prompt_id is None: raise ValueError("Pandas AI has not been run yet.") - return self._last_prompt_id + return self._agent.last_prompt_id @property def logs(self): - return self.logger.logs + return self._agent.logger.logs @property def logger(self): - return self._logger + return self._agent.logger @logger.setter def logger(self, logger): - self._logger = logger + self._agent.logger = logger @property def config(self): - return self._config + return self._agent.context.config @property def cache(self): - return self._cache + return self._agent.context.cache @property def verbose(self): - return self._config.verbose + return self._agent.context.config.verbose @verbose.setter def verbose(self, verbose: bool): - self._config.verbose = verbose - self._logger.verbose = verbose + self._agent.context.config.verbose = verbose + self._agent.logger.verbose = verbose @property def save_logs(self): - return self._config.save_logs + return self._agent.context.config.save_logs @save_logs.setter def save_logs(self, save_logs: bool): - self._config.save_logs = save_logs - self._logger.save_logs = save_logs + self._agent.context.config.save_logs = save_logs + self._agent.logger.save_logs = save_logs @property def enforce_privacy(self): - return self._config.enforce_privacy + return self._agent.context.config.enforce_privacy @enforce_privacy.setter def enforce_privacy(self, enforce_privacy: bool): - self._config.enforce_privacy = enforce_privacy + self._agent.context.config.enforce_privacy = enforce_privacy @property def enable_cache(self): - return self._config.enable_cache + return self._agent.context.config.enable_cache @enable_cache.setter def enable_cache(self, enable_cache: bool): - self._config.enable_cache = enable_cache + self._agent.context.config.enable_cache = enable_cache if enable_cache: if self.cache is None: self._cache = Cache() @@ -607,102 +151,62 @@ def enable_cache(self, enable_cache: bool): @property def use_error_correction_framework(self): - return self._config.use_error_correction_framework + return self._agent.context.config.use_error_correction_framework @use_error_correction_framework.setter def use_error_correction_framework(self, use_error_correction_framework: bool): - self._config.use_error_correction_framework = use_error_correction_framework + self._agent.context.config.use_error_correction_framework = ( + use_error_correction_framework + ) @property def custom_prompts(self): - return self._config.custom_prompts + return self._agent.context.config.custom_prompts @custom_prompts.setter def custom_prompts(self, custom_prompts: dict): - self._config.custom_prompts = custom_prompts + self._agent.context.config.custom_prompts = custom_prompts @property def save_charts(self): - return self._config.save_charts + return self._agent.context.config.save_charts @save_charts.setter def save_charts(self, save_charts: bool): - self._config.save_charts = save_charts + self._agent.context.config.save_charts = save_charts @property def save_charts_path(self): - return self._config.save_charts_path + return self._agent.context.config.save_charts_path @save_charts_path.setter def save_charts_path(self, save_charts_path: str): - self._config.save_charts_path = save_charts_path - - @property - def custom_whitelisted_dependencies(self): - return self._config.custom_whitelisted_dependencies - - @custom_whitelisted_dependencies.setter - def custom_whitelisted_dependencies( - self, custom_whitelisted_dependencies: List[str] - ): - self._config.custom_whitelisted_dependencies = custom_whitelisted_dependencies - - @property - def max_retries(self): - return self._config.max_retries - - @max_retries.setter - def max_retries(self, max_retries: int): - self._config.max_retries = max_retries - - @property - def llm(self): - return self._llm - - @llm.setter - def llm(self, llm: LLM): - self._load_llm(llm) + self._agent.context.config.save_charts_path = save_charts_path @property def last_code_generated(self): - return self._last_code_generated - - @last_code_generated.setter - def last_code_generated(self, last_code_generated: str): - self._last_code_generated = last_code_generated + return self._agent.last_code_generated @property def last_code_executed(self): - return self._code_manager.last_code_executed + return self._agent.last_code_executed @property def last_result(self): - return self._last_result - - @last_result.setter - def last_result(self, last_result: str): - self._last_result = last_result + return self._agent.last_result @property def last_error(self): - return self._last_error - - @last_error.setter - def last_error(self, last_error: str): - self._last_error = last_error + return self._agent.last_error @property def dfs(self): - return self._dfs + return self._agent.context.dfs @property def memory(self): - return self._memory - - @property - def instance(self): - return self._instance + return self._agent.context.memory @property def last_query_log_id(self): - return self._query_exec_tracker.last_log_id + return self._agent.last_query_log_id diff --git a/pandasai/vectorstores/__init__.py b/pandasai/vectorstores/__init__.py new file mode 100644 index 000000000..7b9680faa --- /dev/null +++ b/pandasai/vectorstores/__init__.py @@ -0,0 +1,8 @@ +""" +Vector stores to store data for training purpose +""" + +from .bamboo_vectorstore import BambooVectorStore +from .vectorstore import VectorStore + +__all__ = ["VectorStore", "BambooVectorStore"] diff --git a/pandasai/vectorstores/bamboo_vectorstore.py b/pandasai/vectorstores/bamboo_vectorstore.py new file mode 100644 index 000000000..2dc47c59c --- /dev/null +++ b/pandasai/vectorstores/bamboo_vectorstore.py @@ -0,0 +1,74 @@ +from typing import Iterable, List, Optional, Union + +from pandasai.helpers.logger import Logger +from pandasai.helpers.request import Session +from pandasai.vectorstores.vectorstore import VectorStore + + +class BambooVectorStore(VectorStore): + """ + Implementation of ChromeDB vector store + """ + + _logger: Logger + + def __init__( + self, + endpoint_url: Optional[str] = None, + api_key: Optional[str] = None, + logger: Optional[Logger] = None, + max_samples: int = 1, + ) -> None: + self._max_samples = max_samples + self._logger = logger or Logger() + self._session = Session(endpoint_url, api_key, logger) + + def add_question_answer(self, queries: Iterable[str], codes: Iterable[str]) -> bool: + """ + Add question and answer(code) to the training set + Args: + queries: string of question + codes: str + """ + self._session.post("/training-data", json={"query": queries, "code": codes}) + return True + + def add_docs(self, docs: Iterable[str]) -> bool: + """ + Add docs to the training set + Args: + docs: Iterable of strings to add to the vectorstore. + ids: Optional Iterable of ids associated with the texts. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + self._session.post("/training-docs", json={"docs": docs}) + return True + + def get_relevant_qa_documents(self, question: str, k: int = None) -> List[dict]: + """ + Returns relevant question answers based on search + """ + k = k or self._max_samples + docs = self._session.get( + "/training-data/qa/relevant-qa", params={"query": question, "count": k} + ) + return docs["docs"] + + def get_relevant_docs_documents( + self, question: str, k: Union[int, None] = 3 + ) -> List[str]: + """ + Returns relevant question answers documents only + Args: + question (_type_): list of documents + """ + k = k or self._max_samples + + docs = self._session.get( + "/training-docs/docs/relevant-docs", params={"query": question, "count": k} + ) + return docs["docs"] diff --git a/pandasai/vectorstores/vectorstore.py b/pandasai/vectorstores/vectorstore.py new file mode 100644 index 000000000..d8d1a8b33 --- /dev/null +++ b/pandasai/vectorstores/vectorstore.py @@ -0,0 +1,179 @@ +from abc import ABC, abstractmethod +from typing import Iterable, List, Optional + + +class VectorStore(ABC): + """Interface for vector store.""" + + @abstractmethod + def add_question_answer( + self, + queries: Iterable[str], + codes: Iterable[str], + ids: Optional[Iterable[str]] = None, + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Add question and answer(code) to the training set + Args: + query: string of question + code: str + ids: Optional Iterable of ids associated with the texts. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + Returns: + List of ids from adding the texts into the vectorstore. + """ + raise NotImplementedError( + "add_question_answer method must be implemented by subclass." + ) + + @abstractmethod + def add_docs( + self, + docs: Iterable[str], + ids: Optional[Iterable[str]] = None, + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Add docs to the training set + Args: + docs: Iterable of strings to add to the vectorstore. + ids: Optional Iterable of ids associated with the texts. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + raise NotImplementedError("add_docs method must be implemented by subclass.") + + def update_question_answer( + self, + ids: Iterable[str], + queries: Iterable[str], + codes: Iterable[str], + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Update question and answer(code) to the training set + Args: + ids: Iterable of ids associated with the texts. + queries: string of question + codes: str + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + Returns: + List of ids from updating the texts into the vectorstore. + """ + pass + + def update_docs( + self, + ids: Iterable[str], + docs: Iterable[str], + metadatas: Optional[List[dict]] = None, + ) -> List[str]: + """ + Update docs to the training set + Args: + ids: Iterable of ids associated with the texts. + docs: Iterable of strings to update to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + pass + + def delete_question_and_answers( + self, ids: Optional[List[str]] = None + ) -> Optional[bool]: + """ + Delete by vector ID or other criteria. + Args: + ids: List of ids to delete + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise + """ + raise NotImplementedError( + "delete_question_and_answers method must be implemented by subclass." + ) + + def delete_docs(self, ids: Optional[List[str]] = None) -> Optional[bool]: + """ + Delete by vector ID or other criteria. + Args: + ids: List of ids to delete + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise + """ + raise NotImplementedError("delete_docs method must be implemented by subclass.") + + def delete_collection(self, collection_name: str) -> Optional[bool]: + """ + Delete the collection + Args: + collection_name (str): name of the collection + + Returns: + Optional[bool]: _description_ + """ + + def get_relevant_question_answers(self, question: str, k: int = 1) -> List[dict]: + """ + Returns relevant question answers based on search + """ + raise NotImplementedError( + "get_relevant_question_answers method must be implemented by subclass." + ) + + def get_relevant_docs(self, question: str, k: int = 1) -> List[dict]: + """ + Returns relevant documents based search + """ + raise NotImplementedError( + "get_relevant_docs method must be implemented by subclass." + ) + + def get_relevant_question_answers_by_id(self, ids: Iterable[str]) -> List[dict]: + """ + Returns relevant question answers based on ids + """ + pass + + def get_relevant_docs_by_id(self, ids: Iterable[str]) -> List[dict]: + """ + Returns relevant documents based on ids + """ + pass + + @abstractmethod + def get_relevant_qa_documents(self, question: str, k: int = 1) -> List[str]: + """ + Returns relevant question answers documents only + Args: + question (_type_): list of documents + """ + raise NotImplementedError( + "get_relevant_qa_documents method must be implemented by subclass." + ) + + @abstractmethod + def get_relevant_docs_documents(self, question: str, k: int = 1) -> List[str]: + """ + Returns relevant question answers documents only + Args: + question (_type_): list of documents + """ + raise NotImplementedError( + "get_relevant_docs_documents method must be implemented by subclass." + ) + + def _format_qa(self, query: str, code: str) -> str: + return f"Q: {query}\n A: {code}" diff --git a/poetry.lock b/poetry.lock index f19ba902e..498683b05 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "aiohttp" version = "3.8.6" description = "Async http client/server framework (asyncio)" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -113,7 +112,6 @@ speedups = ["Brotli", "aiodns", "cchardet"] name = "aiohttp-cors" version = "0.7.0" description = "CORS support for aiohttp" -category = "main" optional = true python-versions = "*" files = [ @@ -128,7 +126,6 @@ aiohttp = ">=1.1" name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -143,7 +140,6 @@ frozenlist = ">=1.1.0" name = "alembic" version = "1.12.1" description = "A database migration tool for SQLAlchemy." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -163,7 +159,6 @@ tz = ["python-dateutil"] name = "altair" version = "5.1.2" description = "Vega-Altair: A declarative statistical visualization library for Python." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -188,7 +183,6 @@ doc = ["docutils", "geopandas", "jinja2", "myst-parser", "numpydoc", "pillow (>= name = "annotated-types" version = "0.6.0" description = "Reusable constraint types to use with typing.Annotated" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -200,7 +194,6 @@ files = [ name = "ansicon" version = "1.89.0" description = "Python wrapper for loading Jason Hood's ANSICON" -category = "main" optional = true python-versions = "*" files = [ @@ -212,7 +205,6 @@ files = [ name = "anyio" version = "3.7.1" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -234,7 +226,6 @@ trio = ["trio (<0.22)"] name = "appdirs" version = "1.4.4" description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" optional = true python-versions = "*" files = [ @@ -246,7 +237,6 @@ files = [ name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" -category = "main" optional = false python-versions = "*" files = [ @@ -254,11 +244,27 @@ files = [ {file = "appnope-0.1.3.tar.gz", hash = "sha256:02bd91c4de869fbb1e1c50aafc4098827a7a54ab2f39d9dcba6c9547ed920e24"}, ] +[[package]] +name = "asgiref" +version = "3.7.2" +description = "ASGI specs, helper code, and adapters" +optional = true +python-versions = ">=3.7" +files = [ + {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, + {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} + +[package.extras] +tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] + [[package]] name = "asn1crypto" version = "1.5.1" description = "Fast ASN.1 parser and serializer with definitions for private keys, public keys, certificates, CRL, OCSP, CMS, PKCS#3, PKCS#7, PKCS#8, PKCS#12, PKCS#5, X.509 and TSP" -category = "main" optional = true python-versions = "*" files = [ @@ -270,7 +276,6 @@ files = [ name = "astor" version = "0.8.1" description = "Read/rewrite/write Python ASTs" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ @@ -282,7 +287,6 @@ files = [ name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" -category = "main" optional = false python-versions = "*" files = [ @@ -301,7 +305,6 @@ test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] name = "async-timeout" version = "4.0.3" description = "Timeout context manager for asyncio programs" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -313,7 +316,6 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -328,11 +330,61 @@ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib- tests = ["attrs[tests-no-zope]", "zope-interface"] tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = true +python-versions = ">=3.7,<4.0" +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + +[[package]] +name = "bcrypt" +version = "4.1.2" +description = "Modern password hashing for your software and your servers" +optional = true +python-versions = ">=3.7" +files = [ + {file = "bcrypt-4.1.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ac621c093edb28200728a9cca214d7e838529e557027ef0581685909acd28b5e"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea505c97a5c465ab8c3ba75c0805a102ce526695cd6818c6de3b1a38f6f60da1"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57fa9442758da926ed33a91644649d3e340a71e2d0a5a8de064fb621fd5a3326"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb3bd3321517916696233b5e0c67fd7d6281f0ef48e66812db35fc963a422a1c"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6cad43d8c63f34b26aef462b6f5e44fdcf9860b723d2453b5d391258c4c8e966"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:44290ccc827d3a24604f2c8bcd00d0da349e336e6503656cb8192133e27335e2"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:732b3920a08eacf12f93e6b04ea276c489f1c8fb49344f564cca2adb663b3e4c"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1c28973decf4e0e69cee78c68e30a523be441972c826703bb93099868a8ff5b5"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b8df79979c5bae07f1db22dcc49cc5bccf08a0380ca5c6f391cbb5790355c0b0"}, + {file = "bcrypt-4.1.2-cp37-abi3-win32.whl", hash = "sha256:fbe188b878313d01b7718390f31528be4010fed1faa798c5a1d0469c9c48c369"}, + {file = "bcrypt-4.1.2-cp37-abi3-win_amd64.whl", hash = "sha256:9800ae5bd5077b13725e2e3934aa3c9c37e49d3ea3d06318010aa40f54c63551"}, + {file = "bcrypt-4.1.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:71b8be82bc46cedd61a9f4ccb6c1a493211d031415a34adde3669ee1b0afbb63"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e3c6642077b0c8092580c819c1684161262b2e30c4f45deb000c38947bf483"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:387e7e1af9a4dd636b9505a465032f2f5cb8e61ba1120e79a0e1cd0b512f3dfc"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f70d9c61f9c4ca7d57f3bfe88a5ccf62546ffbadf3681bb1e268d9d2e41c91a7"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2a298db2a8ab20056120b45e86c00a0a5eb50ec4075b6142db35f593b97cb3fb"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ba55e40de38a24e2d78d34c2d36d6e864f93e0d79d0b6ce915e4335aa81d01b1"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:3566a88234e8de2ccae31968127b0ecccbb4cddb629da744165db72b58d88ca4"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b90e216dc36864ae7132cb151ffe95155a37a14e0de3a8f64b49655dd959ff9c"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:69057b9fc5093ea1ab00dd24ede891f3e5e65bee040395fb1e66ee196f9c9b4a"}, + {file = "bcrypt-4.1.2-cp39-abi3-win32.whl", hash = "sha256:02d9ef8915f72dd6daaef40e0baeef8a017ce624369f09754baf32bb32dba25f"}, + {file = "bcrypt-4.1.2-cp39-abi3-win_amd64.whl", hash = "sha256:be3ab1071662f6065899fe08428e45c16aa36e28bc42921c4901a191fda6ee42"}, + {file = "bcrypt-4.1.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d75fc8cd0ba23f97bae88a6ec04e9e5351ff3c6ad06f38fe32ba50cbd0d11946"}, + {file = "bcrypt-4.1.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a97e07e83e3262599434816f631cc4c7ca2aa8e9c072c1b1a7fec2ae809a1d2d"}, + {file = "bcrypt-4.1.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e51c42750b7585cee7892c2614be0d14107fad9581d1738d954a262556dd1aab"}, + {file = "bcrypt-4.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba4e4cc26610581a6329b3937e02d319f5ad4b85b074846bf4fef8a8cf51e7bb"}, + {file = "bcrypt-4.1.2.tar.gz", hash = "sha256:33313a1200a3ae90b75587ceac502b048b840fc69e7f7a0905b5f87fac7a1258"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + [[package]] name = "beautifulsoup4" version = "4.12.2" description = "Screen-scraping library" -category = "main" optional = true python-versions = ">=3.6.0" files = [ @@ -351,7 +403,6 @@ lxml = ["lxml"] name = "blessed" version = "1.20.0" description = "Easy, practical library for making terminal apps, by providing an elegant, well-documented interface to Colors, Keyboard input, and screen Positioning capabilities." -category = "main" optional = true python-versions = ">=2.7" files = [ @@ -368,7 +419,6 @@ wcwidth = ">=0.1.4" name = "blinker" version = "1.6.3" description = "Fast, simple object-to-object and broadcast signaling" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -380,7 +430,6 @@ files = [ name = "brewer2mpl" version = "1.4.1" description = "Connect colorbrewer2.org color maps to Python and matplotlib" -category = "main" optional = true python-versions = "*" files = [ @@ -389,11 +438,34 @@ files = [ {file = "brewer2mpl-1.4.1.zip", hash = "sha256:7f013109a2351b91d507c1e79c4fcfb001bdd754f70767baa635f1e80ac16d6d"}, ] +[[package]] +name = "build" +version = "1.0.3" +description = "A simple, correct Python build frontend" +optional = true +python-versions = ">= 3.7" +files = [ + {file = "build-1.0.3-py3-none-any.whl", hash = "sha256:589bf99a67df7c9cf07ec0ac0e5e2ea5d4b37ac63301c4986d1acb126aa83f8f"}, + {file = "build-1.0.3.tar.gz", hash = "sha256:538aab1b64f9828977f84bc63ae570b060a8ed1be419e7870b8b4fc5e6ea553b"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "os_name == \"nt\""} +importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""} +packaging = ">=19.0" +pyproject_hooks = "*" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} + +[package.extras] +docs = ["furo (>=2023.08.17)", "sphinx (>=7.0,<8.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)", "sphinx-issues (>=3.0.0)"] +test = ["filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)", "setuptools (>=56.0.0)", "setuptools (>=67.8.0)", "wheel (>=0.36.0)"] +typing = ["importlib-metadata (>=5.1)", "mypy (>=1.5.0,<1.6.0)", "tomli", "typing-extensions (>=3.7.4.3)"] +virtualenv = ["virtualenv (>=20.0.35)"] + [[package]] name = "cachetools" version = "5.3.2" description = "Extensible memoizing collections and decorators" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -405,7 +477,6 @@ files = [ name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -417,7 +488,6 @@ files = [ name = "cffi" version = "1.16.0" description = "Foreign Function Interface for Python calling C code." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -482,7 +552,6 @@ pycparser = "*" name = "cfgv" version = "3.4.0" description = "Validate configuration and produce human readable error messages." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -494,7 +563,6 @@ files = [ name = "charset-normalizer" version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -590,11 +658,87 @@ files = [ {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, ] +[[package]] +name = "chroma-hnswlib" +version = "0.7.3" +description = "Chromas fork of hnswlib" +optional = true +python-versions = "*" +files = [ + {file = "chroma-hnswlib-0.7.3.tar.gz", hash = "sha256:b6137bedde49fffda6af93b0297fe00429fc61e5a072b1ed9377f909ed95a932"}, + {file = "chroma_hnswlib-0.7.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:59d6a7c6f863c67aeb23e79a64001d537060b6995c3eca9a06e349ff7b0998ca"}, + {file = "chroma_hnswlib-0.7.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d71a3f4f232f537b6152947006bd32bc1629a8686df22fd97777b70f416c127a"}, + {file = "chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c92dc1ebe062188e53970ba13f6b07e0ae32e64c9770eb7f7ffa83f149d4210"}, + {file = "chroma_hnswlib-0.7.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49da700a6656fed8753f68d44b8cc8ae46efc99fc8a22a6d970dc1697f49b403"}, + {file = "chroma_hnswlib-0.7.3-cp310-cp310-win_amd64.whl", hash = "sha256:108bc4c293d819b56476d8f7865803cb03afd6ca128a2a04d678fffc139af029"}, + {file = "chroma_hnswlib-0.7.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:11e7ca93fb8192214ac2b9c0943641ac0daf8f9d4591bb7b73be808a83835667"}, + {file = "chroma_hnswlib-0.7.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6f552e4d23edc06cdeb553cdc757d2fe190cdeb10d43093d6a3319f8d4bf1c6b"}, + {file = "chroma_hnswlib-0.7.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f96f4d5699e486eb1fb95849fe35ab79ab0901265805be7e60f4eaa83ce263ec"}, + {file = "chroma_hnswlib-0.7.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:368e57fe9ebae05ee5844840fa588028a023d1182b0cfdb1d13f607c9ea05756"}, + {file = "chroma_hnswlib-0.7.3-cp311-cp311-win_amd64.whl", hash = "sha256:b7dca27b8896b494456db0fd705b689ac6b73af78e186eb6a42fea2de4f71c6f"}, + {file = "chroma_hnswlib-0.7.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:70f897dc6218afa1d99f43a9ad5eb82f392df31f57ff514ccf4eeadecd62f544"}, + {file = "chroma_hnswlib-0.7.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aef10b4952708f5a1381c124a29aead0c356f8d7d6e0b520b778aaa62a356f4"}, + {file = "chroma_hnswlib-0.7.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ee2d8d1529fca3898d512079144ec3e28a81d9c17e15e0ea4665697a7923253"}, + {file = "chroma_hnswlib-0.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:a4021a70e898783cd6f26e00008b494c6249a7babe8774e90ce4766dd288c8ba"}, + {file = "chroma_hnswlib-0.7.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a8f61fa1d417fda848e3ba06c07671f14806a2585272b175ba47501b066fe6b1"}, + {file = "chroma_hnswlib-0.7.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d7563be58bc98e8f0866907368e22ae218d6060601b79c42f59af4eccbbd2e0a"}, + {file = "chroma_hnswlib-0.7.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51b8d411486ee70d7b66ec08cc8b9b6620116b650df9c19076d2d8b6ce2ae914"}, + {file = "chroma_hnswlib-0.7.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d706782b628e4f43f1b8a81e9120ac486837fbd9bcb8ced70fe0d9b95c72d77"}, + {file = "chroma_hnswlib-0.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:54f053dedc0e3ba657f05fec6e73dd541bc5db5b09aa8bc146466ffb734bdc86"}, + {file = "chroma_hnswlib-0.7.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e607c5a71c610a73167a517062d302c0827ccdd6e259af6e4869a5c1306ffb5d"}, + {file = "chroma_hnswlib-0.7.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2358a795870156af6761890f9eb5ca8cade57eb10c5f046fe94dae1faa04b9e"}, + {file = "chroma_hnswlib-0.7.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cea425df2e6b8a5e201fff0d922a1cc1d165b3cfe762b1408075723c8892218"}, + {file = "chroma_hnswlib-0.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:454df3dd3e97aa784fba7cf888ad191e0087eef0fd8c70daf28b753b3b591170"}, + {file = "chroma_hnswlib-0.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:df587d15007ca701c6de0ee7d5585dd5e976b7edd2b30ac72bc376b3c3f85882"}, +] + +[package.dependencies] +numpy = "*" + +[[package]] +name = "chromadb" +version = "0.4.22" +description = "Chroma." +optional = true +python-versions = ">=3.8" +files = [ + {file = "chromadb-0.4.22-py3-none-any.whl", hash = "sha256:ad210b27b4cda2f09d15adc9c83c81bfa66b69f39648a27b637306e40de0680d"}, + {file = "chromadb-0.4.22.tar.gz", hash = "sha256:c793149e1c2bbbb52d77602c6c0594c5752f04cd9be12619250ddad2082af27a"}, +] + +[package.dependencies] +bcrypt = ">=4.0.1" +build = ">=1.0.3" +chroma-hnswlib = "0.7.3" +fastapi = ">=0.95.2" +grpcio = ">=1.58.0" +importlib-resources = "*" +kubernetes = ">=28.1.0" +mmh3 = ">=4.0.1" +numpy = ">=1.22.5" +onnxruntime = ">=1.14.1" +opentelemetry-api = ">=1.2.0" +opentelemetry-exporter-otlp-proto-grpc = ">=1.2.0" +opentelemetry-instrumentation-fastapi = ">=0.41b0" +opentelemetry-sdk = ">=1.2.0" +overrides = ">=7.3.1" +posthog = ">=2.4.0" +pulsar-client = ">=3.1.0" +pydantic = ">=1.9" +pypika = ">=0.48.9" +PyYAML = ">=6.0.0" +requests = ">=2.28" +tenacity = ">=8.2.3" +tokenizers = ">=0.13.2" +tqdm = ">=4.65.0" +typer = ">=0.9.0" +typing-extensions = ">=4.5.0" +uvicorn = {version = ">=0.18.3", extras = ["standard"]} + [[package]] name = "click" version = "8.1.7" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -609,7 +753,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "codespell" version = "2.2.6" description = "Codespell" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -627,7 +770,6 @@ types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -635,11 +777,27 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "coloredlogs" +version = "15.0.1" +description = "Colored terminal output for Python's logging module" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934"}, + {file = "coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0"}, +] + +[package.dependencies] +humanfriendly = ">=9.1" + +[package.extras] +cron = ["capturer (>=2.4)"] + [[package]] name = "colorful" version = "0.5.6" description = "Terminal string styling done right, in Python." -category = "main" optional = true python-versions = "*" files = [ @@ -652,140 +810,71 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "contourpy" -version = "1.1.0" +version = "1.2.0" description = "Python library for calculating contours of 2D quadrilateral grids" -category = "main" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "contourpy-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc"}, - {file = "contourpy-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3"}, - {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655"}, - {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48"}, - {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37"}, - {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa"}, - {file = "contourpy-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa"}, - {file = "contourpy-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2"}, - {file = "contourpy-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882"}, - {file = "contourpy-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545"}, - {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e"}, - {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18"}, - {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e"}, - {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a"}, - {file = "contourpy-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e"}, - {file = "contourpy-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"}, - {file = "contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed"}, - {file = "contourpy-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae"}, - {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103"}, - {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70"}, - {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94"}, - {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f"}, - {file = "contourpy-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1"}, - {file = "contourpy-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4"}, - {file = "contourpy-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9"}, - {file = "contourpy-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a"}, - {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9"}, - {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15"}, - {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f"}, - {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a"}, - {file = "contourpy-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002"}, - {file = "contourpy-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d"}, - {file = "contourpy-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493"}, - {file = "contourpy-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439"}, - {file = "contourpy-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76"}, - {file = "contourpy-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027"}, - {file = "contourpy-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104"}, - {file = "contourpy-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f"}, - {file = "contourpy-1.1.0.tar.gz", hash = "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21"}, -] - -[package.dependencies] -numpy = ">=1.16" - -[package.extras] -bokeh = ["bokeh", "selenium"] -docs = ["furo", "sphinx-copybutton"] -mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.2.0)", "types-Pillow"] -test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] -test-no-images = ["pytest", "pytest-cov", "wurlitzer"] + {file = "contourpy-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0274c1cb63625972c0c007ab14dd9ba9e199c36ae1a231ce45d725cbcbfd10a8"}, + {file = "contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab459a1cbbf18e8698399c595a01f6dcc5c138220ca3ea9e7e6126232d102bb4"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fdd887f17c2f4572ce548461e4f96396681212d858cae7bd52ba3310bc6f00f"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5d16edfc3fc09968e09ddffada434b3bf989bf4911535e04eada58469873e28e"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c203f617abc0dde5792beb586f827021069fb6d403d7f4d5c2b543d87edceb9"}, + {file = "contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b69303ceb2e4d4f146bf82fda78891ef7bcd80c41bf16bfca3d0d7eb545448aa"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:884c3f9d42d7218304bc74a8a7693d172685c84bd7ab2bab1ee567b769696df9"}, + {file = "contourpy-1.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4a1b1208102be6e851f20066bf0e7a96b7d48a07c9b0cfe6d0d4545c2f6cadab"}, + {file = "contourpy-1.2.0-cp310-cp310-win32.whl", hash = "sha256:34b9071c040d6fe45d9826cbbe3727d20d83f1b6110d219b83eb0e2a01d79488"}, + {file = "contourpy-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:bd2f1ae63998da104f16a8b788f685e55d65760cd1929518fd94cd682bf03e41"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd10c26b4eadae44783c45ad6655220426f971c61d9b239e6f7b16d5cdaaa727"}, + {file = "contourpy-1.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5c6b28956b7b232ae801406e529ad7b350d3f09a4fde958dfdf3c0520cdde0dd"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebeac59e9e1eb4b84940d076d9f9a6cec0064e241818bcb6e32124cc5c3e377a"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:139d8d2e1c1dd52d78682f505e980f592ba53c9f73bd6be102233e358b401063"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e9dc350fb4c58adc64df3e0703ab076f60aac06e67d48b3848c23647ae4310e"}, + {file = "contourpy-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18fc2b4ed8e4a8fe849d18dce4bd3c7ea637758c6343a1f2bae1e9bd4c9f4686"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:16a7380e943a6d52472096cb7ad5264ecee36ed60888e2a3d3814991a0107286"}, + {file = "contourpy-1.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8d8faf05be5ec8e02a4d86f616fc2a0322ff4a4ce26c0f09d9f7fb5330a35c95"}, + {file = "contourpy-1.2.0-cp311-cp311-win32.whl", hash = "sha256:67b7f17679fa62ec82b7e3e611c43a016b887bd64fb933b3ae8638583006c6d6"}, + {file = "contourpy-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:99ad97258985328b4f207a5e777c1b44a83bfe7cf1f87b99f9c11d4ee477c4de"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:575bcaf957a25d1194903a10bc9f316c136c19f24e0985a2b9b5608bdf5dbfe0"}, + {file = "contourpy-1.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9e6c93b5b2dbcedad20a2f18ec22cae47da0d705d454308063421a3b290d9ea4"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:464b423bc2a009088f19bdf1f232299e8b6917963e2b7e1d277da5041f33a779"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:68ce4788b7d93e47f84edd3f1f95acdcd142ae60bc0e5493bfd120683d2d4316"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d7d1f8871998cdff5d2ff6a087e5e1780139abe2838e85b0b46b7ae6cc25399"}, + {file = "contourpy-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e739530c662a8d6d42c37c2ed52a6f0932c2d4a3e8c1f90692ad0ce1274abe0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:247b9d16535acaa766d03037d8e8fb20866d054d3c7fbf6fd1f993f11fc60ca0"}, + {file = "contourpy-1.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:461e3ae84cd90b30f8d533f07d87c00379644205b1d33a5ea03381edc4b69431"}, + {file = "contourpy-1.2.0-cp312-cp312-win32.whl", hash = "sha256:1c2559d6cffc94890b0529ea7eeecc20d6fadc1539273aa27faf503eb4656d8f"}, + {file = "contourpy-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:491b1917afdd8638a05b611a56d46587d5a632cabead889a5440f7c638bc6ed9"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5fd1810973a375ca0e097dee059c407913ba35723b111df75671a1976efa04bc"}, + {file = "contourpy-1.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:999c71939aad2780f003979b25ac5b8f2df651dac7b38fb8ce6c46ba5abe6ae9"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7caf9b241464c404613512d5594a6e2ff0cc9cb5615c9475cc1d9b514218ae8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:266270c6f6608340f6c9836a0fb9b367be61dde0c9a9a18d5ece97774105ff3e"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbd50d0a0539ae2e96e537553aff6d02c10ed165ef40c65b0e27e744a0f10af8"}, + {file = "contourpy-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11f8d2554e52f459918f7b8e6aa20ec2a3bce35ce95c1f0ef4ba36fbda306df5"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ce96dd400486e80ac7d195b2d800b03e3e6a787e2a522bfb83755938465a819e"}, + {file = "contourpy-1.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6d3364b999c62f539cd403f8123ae426da946e142312a514162adb2addd8d808"}, + {file = "contourpy-1.2.0-cp39-cp39-win32.whl", hash = "sha256:1c88dfb9e0c77612febebb6ac69d44a8d81e3dc60f993215425b62c1161353f4"}, + {file = "contourpy-1.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:78e6ad33cf2e2e80c5dfaaa0beec3d61face0fb650557100ee36db808bfa6843"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:be16975d94c320432657ad2402f6760990cb640c161ae6da1363051805fa8108"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b95a225d4948b26a28c08307a60ac00fb8671b14f2047fc5476613252a129776"}, + {file = "contourpy-1.2.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:0d7e03c0f9a4f90dc18d4e77e9ef4ec7b7bbb437f7f675be8e530d65ae6ef956"}, + {file = "contourpy-1.2.0.tar.gz", hash = "sha256:171f311cb758de7da13fc53af221ae47a5877be5a0843a9fe150818c51ed276a"}, +] -[[package]] -name = "contourpy" -version = "1.1.1" -description = "Python library for calculating contours of 2D quadrilateral grids" -category = "main" -optional = false -python-versions = ">=3.8" -files = [ - {file = "contourpy-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:46e24f5412c948d81736509377e255f6040e94216bf1a9b5ea1eaa9d29f6ec1b"}, - {file = "contourpy-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e48694d6a9c5a26ee85b10130c77a011a4fedf50a7279fa0bdaf44bafb4299d"}, - {file = "contourpy-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a66045af6cf00e19d02191ab578a50cb93b2028c3eefed999793698e9ea768ae"}, - {file = "contourpy-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ebf42695f75ee1a952f98ce9775c873e4971732a87334b099dde90b6af6a916"}, - {file = "contourpy-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6aec19457617ef468ff091669cca01fa7ea557b12b59a7908b9474bb9674cf0"}, - {file = "contourpy-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:462c59914dc6d81e0b11f37e560b8a7c2dbab6aca4f38be31519d442d6cde1a1"}, - {file = "contourpy-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6d0a8efc258659edc5299f9ef32d8d81de8b53b45d67bf4bfa3067f31366764d"}, - {file = "contourpy-1.1.1-cp310-cp310-win32.whl", hash = "sha256:d6ab42f223e58b7dac1bb0af32194a7b9311065583cc75ff59dcf301afd8a431"}, - {file = "contourpy-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:549174b0713d49871c6dee90a4b499d3f12f5e5f69641cd23c50a4542e2ca1eb"}, - {file = "contourpy-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:407d864db716a067cc696d61fa1ef6637fedf03606e8417fe2aeed20a061e6b2"}, - {file = "contourpy-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe80c017973e6a4c367e037cb31601044dd55e6bfacd57370674867d15a899b"}, - {file = "contourpy-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e30aaf2b8a2bac57eb7e1650df1b3a4130e8d0c66fc2f861039d507a11760e1b"}, - {file = "contourpy-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3de23ca4f381c3770dee6d10ead6fff524d540c0f662e763ad1530bde5112532"}, - {file = "contourpy-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:566f0e41df06dfef2431defcfaa155f0acfa1ca4acbf8fd80895b1e7e2ada40e"}, - {file = "contourpy-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b04c2f0adaf255bf756cf08ebef1be132d3c7a06fe6f9877d55640c5e60c72c5"}, - {file = "contourpy-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d0c188ae66b772d9d61d43c6030500344c13e3f73a00d1dc241da896f379bb62"}, - {file = "contourpy-1.1.1-cp311-cp311-win32.whl", hash = "sha256:0683e1ae20dc038075d92e0e0148f09ffcefab120e57f6b4c9c0f477ec171f33"}, - {file = "contourpy-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:8636cd2fc5da0fb102a2504fa2c4bea3cbc149533b345d72cdf0e7a924decc45"}, - {file = "contourpy-1.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:560f1d68a33e89c62da5da4077ba98137a5e4d3a271b29f2f195d0fba2adcb6a"}, - {file = "contourpy-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:24216552104ae8f3b34120ef84825400b16eb6133af2e27a190fdc13529f023e"}, - {file = "contourpy-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56de98a2fb23025882a18b60c7f0ea2d2d70bbbcfcf878f9067234b1c4818442"}, - {file = "contourpy-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:07d6f11dfaf80a84c97f1a5ba50d129d9303c5b4206f776e94037332e298dda8"}, - {file = "contourpy-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1eaac5257a8f8a047248d60e8f9315c6cff58f7803971170d952555ef6344a7"}, - {file = "contourpy-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19557fa407e70f20bfaba7d55b4d97b14f9480856c4fb65812e8a05fe1c6f9bf"}, - {file = "contourpy-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:081f3c0880712e40effc5f4c3b08feca6d064cb8cfbb372ca548105b86fd6c3d"}, - {file = "contourpy-1.1.1-cp312-cp312-win32.whl", hash = "sha256:059c3d2a94b930f4dafe8105bcdc1b21de99b30b51b5bce74c753686de858cb6"}, - {file = "contourpy-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:f44d78b61740e4e8c71db1cf1fd56d9050a4747681c59ec1094750a658ceb970"}, - {file = "contourpy-1.1.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:70e5a10f8093d228bb2b552beeb318b8928b8a94763ef03b858ef3612b29395d"}, - {file = "contourpy-1.1.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8394e652925a18ef0091115e3cc191fef350ab6dc3cc417f06da66bf98071ae9"}, - {file = "contourpy-1.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5bd5680f844c3ff0008523a71949a3ff5e4953eb7701b28760805bc9bcff217"}, - {file = "contourpy-1.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66544f853bfa85c0d07a68f6c648b2ec81dafd30f272565c37ab47a33b220684"}, - {file = "contourpy-1.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0c02b75acfea5cab07585d25069207e478d12309557f90a61b5a3b4f77f46ce"}, - {file = "contourpy-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41339b24471c58dc1499e56783fedc1afa4bb018bcd035cfb0ee2ad2a7501ef8"}, - {file = "contourpy-1.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f29fb0b3f1217dfe9362ec55440d0743fe868497359f2cf93293f4b2701b8251"}, - {file = "contourpy-1.1.1-cp38-cp38-win32.whl", hash = "sha256:f9dc7f933975367251c1b34da882c4f0e0b2e24bb35dc906d2f598a40b72bfc7"}, - {file = "contourpy-1.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:498e53573e8b94b1caeb9e62d7c2d053c263ebb6aa259c81050766beb50ff8d9"}, - {file = "contourpy-1.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ba42e3810999a0ddd0439e6e5dbf6d034055cdc72b7c5c839f37a7c274cb4eba"}, - {file = "contourpy-1.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c06e4c6e234fcc65435223c7b2a90f286b7f1b2733058bdf1345d218cc59e34"}, - {file = "contourpy-1.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca6fab080484e419528e98624fb5c4282148b847e3602dc8dbe0cb0669469887"}, - {file = "contourpy-1.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93df44ab351119d14cd1e6b52a5063d3336f0754b72736cc63db59307dabb718"}, - {file = "contourpy-1.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eafbef886566dc1047d7b3d4b14db0d5b7deb99638d8e1be4e23a7c7ac59ff0f"}, - {file = "contourpy-1.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efe0fab26d598e1ec07d72cf03eaeeba8e42b4ecf6b9ccb5a356fde60ff08b85"}, - {file = "contourpy-1.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f08e469821a5e4751c97fcd34bcb586bc243c39c2e39321822060ba902eac49e"}, - {file = "contourpy-1.1.1-cp39-cp39-win32.whl", hash = "sha256:bfc8a5e9238232a45ebc5cb3bfee71f1167064c8d382cadd6076f0d51cff1da0"}, - {file = "contourpy-1.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:c84fdf3da00c2827d634de4fcf17e3e067490c4aea82833625c4c8e6cdea0887"}, - {file = "contourpy-1.1.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:229a25f68046c5cf8067d6d6351c8b99e40da11b04d8416bf8d2b1d75922521e"}, - {file = "contourpy-1.1.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a10dab5ea1bd4401c9483450b5b0ba5416be799bbd50fc7a6cc5e2a15e03e8a3"}, - {file = "contourpy-1.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:4f9147051cb8fdb29a51dc2482d792b3b23e50f8f57e3720ca2e3d438b7adf23"}, - {file = "contourpy-1.1.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a75cc163a5f4531a256f2c523bd80db509a49fc23721b36dd1ef2f60ff41c3cb"}, - {file = "contourpy-1.1.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b53d5769aa1f2d4ea407c65f2d1d08002952fac1d9e9d307aa2e1023554a163"}, - {file = "contourpy-1.1.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:11b836b7dbfb74e049c302bbf74b4b8f6cb9d0b6ca1bf86cfa8ba144aedadd9c"}, - {file = "contourpy-1.1.1.tar.gz", hash = "sha256:96ba37c2e24b7212a77da85004c38e7c4d155d3e72a45eeaf22c1f03f607e8ab"}, -] - -[package.dependencies] -numpy = {version = ">=1.16,<2.0", markers = "python_version <= \"3.11\""} +[package.dependencies] +numpy = ">=1.20,<2.0" [package.extras] bokeh = ["bokeh", "selenium"] docs = ["furo", "sphinx (>=7.2)", "sphinx-copybutton"] -mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.4.1)", "types-Pillow"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.6.1)", "types-Pillow"] test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] -test-no-images = ["pytest", "pytest-cov", "wurlitzer"] +test-no-images = ["pytest", "pytest-cov", "pytest-xdist", "wurlitzer"] [[package]] name = "coverage" version = "7.3.2" description = "Code coverage measurement for Python" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -850,7 +939,6 @@ toml = ["tomli"] name = "cryptography" version = "41.0.5" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -896,7 +984,6 @@ test-randomorder = ["pytest-randomly"] name = "cycler" version = "0.12.1" description = "Composable style cycles" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -912,7 +999,6 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] name = "databricks-sql-connector" version = "2.9.3" description = "Databricks SQL Connector for Python" -category = "main" optional = true python-versions = ">=3.7.1,<4.0.0" files = [ @@ -943,7 +1029,6 @@ urllib3 = ">=1.0" name = "dataclasses-json" version = "0.5.9" description = "Easily serialize dataclasses to and from JSON" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -963,7 +1048,6 @@ dev = ["flake8", "hypothesis", "ipython", "mypy (>=0.710)", "portray", "pytest ( name = "decorator" version = "5.1.1" description = "Decorators for Humans" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -971,11 +1055,27 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + [[package]] name = "distlib" version = "0.3.7" description = "Distribution utilities" -category = "main" optional = false python-versions = "*" files = [ @@ -987,7 +1087,6 @@ files = [ name = "distro" version = "1.8.0" description = "Distro - an OS platform information API" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -999,7 +1098,6 @@ files = [ name = "duckdb" version = "0.9.2" description = "DuckDB embedded database" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -1048,7 +1146,6 @@ files = [ name = "et-xmlfile" version = "1.1.0" description = "An implementation of lxml.xmlfile for the standard library" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1060,7 +1157,6 @@ files = [ name = "exceptiongroup" version = "1.1.3" description = "Backport of PEP 654 (exception groups)" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1075,7 +1171,6 @@ test = ["pytest (>=6)"] name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1090,7 +1185,6 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth name = "faker" version = "19.12.1" description = "Faker is a Python package that generates fake data for you." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1101,11 +1195,29 @@ files = [ [package.dependencies] python-dateutil = ">=2.4" +[[package]] +name = "fastapi" +version = "0.109.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = true +python-versions = ">=3.8" +files = [ + {file = "fastapi-0.109.0-py3-none-any.whl", hash = "sha256:8c77515984cd8e8cfeb58364f8cc7a28f0692088475e2614f7bf03275eba9093"}, + {file = "fastapi-0.109.0.tar.gz", hash = "sha256:b978095b9ee01a5cf49b19f4bc1ac9b8ca83aa076e770ef8fd9af09a2b88d191"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" +starlette = ">=0.35.0,<0.36.0" +typing-extensions = ">=4.8.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] + [[package]] name = "filelock" version = "3.13.1" description = "A platform independent file lock." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1122,7 +1234,6 @@ typing = ["typing-extensions (>=4.8)"] name = "flask" version = "3.0.2" description = "A simple framework for building complex web applications." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1142,11 +1253,21 @@ Werkzeug = ">=3.0.0" async = ["asgiref (>=3.2)"] dotenv = ["python-dotenv"] +[[package]] +name = "flatbuffers" +version = "23.5.26" +description = "The FlatBuffers serialization format for Python" +optional = true +python-versions = "*" +files = [ + {file = "flatbuffers-23.5.26-py2.py3-none-any.whl", hash = "sha256:c0ff356da363087b915fde4b8b45bdda73432fc17cddb3c8157472eab1422ad1"}, + {file = "flatbuffers-23.5.26.tar.gz", hash = "sha256:9ea1144cac05ce5d86e2859f431c6cd5e66cd9c78c558317c7955fb8d4c78d89"}, +] + [[package]] name = "fonttools" version = "4.43.1" description = "Tools to manipulate font files" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1212,7 +1333,6 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] name = "frozendict" version = "2.3.8" description = "A simple immutable dictionary" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1259,7 +1379,6 @@ files = [ name = "frozenlist" version = "1.4.0" description = "A list-like structure which implements collections.abc.MutableSequence" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1330,7 +1449,6 @@ files = [ name = "fsspec" version = "2023.10.0" description = "File-system specification" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1366,7 +1484,6 @@ tqdm = ["tqdm"] name = "future" version = "0.18.3" description = "Clean single-source support for Python 3 and 2" -category = "main" optional = true python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1377,7 +1494,6 @@ files = [ name = "ggplot" version = "0.11.5" description = "ggplot for python" -category = "main" optional = true python-versions = "*" files = [ @@ -1400,7 +1516,6 @@ statsmodels = "*" name = "ghp-import" version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." -category = "dev" optional = false python-versions = "*" files = [ @@ -1418,7 +1533,6 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "gitdb" version = "4.0.11" description = "Git Object Database" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1433,7 +1547,6 @@ smmap = ">=3.0.1,<6" name = "gitpython" version = "3.1.40" description = "GitPython is a Python library used to interact with Git repositories" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1451,7 +1564,6 @@ test = ["black", "coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre name = "google-ai-generativelanguage" version = "0.4.0" description = "Google Ai Generativelanguage API client library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1460,7 +1572,7 @@ files = [ ] [package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -1468,7 +1580,6 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4 name = "google-api-core" version = "2.12.0" description = "Google API client core library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1480,11 +1591,11 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" @@ -1499,7 +1610,6 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] name = "google-auth" version = "2.23.4" description = "Google Authentication Library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1521,18 +1631,17 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-cloud-aiplatform" -version = "1.38.0" +version = "1.38.1" description = "Vertex AI API client library" -category = "main" optional = true python-versions = ">=3.8" files = [ - {file = "google-cloud-aiplatform-1.38.0.tar.gz", hash = "sha256:dff91f79b64e279f0e61dfd63c4e067ba5fa75ef0f4614289bbdca70d086a9e2"}, - {file = "google_cloud_aiplatform-1.38.0-py2.py3-none-any.whl", hash = "sha256:7eec50d9a36d43e163f019a1ade9284d4580602a5108738a0ebff8940ea47ce0"}, + {file = "google-cloud-aiplatform-1.38.1.tar.gz", hash = "sha256:30439d914bb028443c0506cc0e6dd825cff5401aeb8233e13d8cfd77c3c87da1"}, + {file = "google_cloud_aiplatform-1.38.1-py2.py3-none-any.whl", hash = "sha256:5e1fcd1068dd2c4f0fc89aa616e34a8b9434eaa72ea6216f5036ef26f08bd448"}, ] [package.dependencies] -google-api-core = {version = ">=1.32.0,<2.0.0 || >=2.8.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.32.0,<2.0.dev0 || >=2.8.dev0,<3.0.0dev", extras = ["grpc"]} google-cloud-bigquery = ">=1.15.0,<4.0.0dev" google-cloud-resource-manager = ">=1.3.3,<3.0.0dev" google-cloud-storage = ">=1.32.0,<3.0.0dev" @@ -1564,7 +1673,6 @@ xai = ["tensorflow (>=2.3.0,<3.0.0dev)"] name = "google-cloud-bigquery" version = "3.13.0" description = "Google BigQuery API client library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1573,7 +1681,7 @@ files = [ ] [package.dependencies] -google-api-core = {version = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev", extras = ["grpc"]} google-cloud-core = ">=1.6.0,<3.0.0dev" google-resumable-media = ">=0.6.0,<3.0dev" grpcio = [ @@ -1600,7 +1708,6 @@ tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] name = "google-cloud-core" version = "2.3.3" description = "Google Cloud API client core library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1609,7 +1716,7 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.6,<2.0.0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" [package.extras] @@ -1619,7 +1726,6 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] name = "google-cloud-resource-manager" version = "1.10.4" description = "Google Cloud Resource Manager API client library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1628,7 +1734,7 @@ files = [ ] [package.dependencies] -google-api-core = {version = ">=1.34.0,<2.0.0 || >=2.11.0,<3.0.0dev", extras = ["grpc"]} +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} grpc-google-iam-v1 = ">=0.12.4,<1.0.0dev" proto-plus = [ {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, @@ -1640,7 +1746,6 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4 name = "google-cloud-storage" version = "2.13.0" description = "Google Cloud Storage API client library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1649,7 +1754,7 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.5,<2.0.0 || >2.3.0,<3.0.0dev" +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=2.23.3,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" google-crc32c = ">=1.0,<2.0dev" @@ -1663,7 +1768,6 @@ protobuf = ["protobuf (<5.0.0dev)"] name = "google-crc32c" version = "1.5.0" description = "A python wrapper of the C library 'Google CRC32C'" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1744,7 +1848,6 @@ testing = ["pytest"] name = "google-generativeai" version = "0.3.2" description = "Google Generative AI High level API client library and tools." -category = "main" optional = true python-versions = ">=3.9" files = [ @@ -1766,7 +1869,6 @@ dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "py name = "google-resumable-media" version = "2.6.0" description = "Utilities for Google Media Downloads and Resumable Uploads" -category = "main" optional = true python-versions = ">= 3.7" files = [ @@ -1785,7 +1887,6 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] name = "googleapis-common-protos" version = "1.61.0" description = "Common protobufs used in Google APIs" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1804,7 +1905,6 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] name = "gpustat" version = "1.1.1" description = "An utility to monitor NVIDIA GPU status and usage" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -1824,7 +1924,6 @@ test = ["mockito (>=1.2.1)", "pytest (>=5.4.1)", "pytest-runner"] name = "greenlet" version = "3.0.1" description = "Lightweight in-process concurrent programming" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1895,7 +1994,6 @@ test = ["objgraph", "psutil"] name = "griffe" version = "0.36.9" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1910,7 +2008,6 @@ colorama = ">=0.4" name = "grpc-google-iam-v1" version = "0.12.6" description = "IAM API client library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1927,7 +2024,6 @@ protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4 name = "grpcio" version = "1.59.2" description = "HTTP/2-based RPC framework" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1994,7 +2090,6 @@ protobuf = ["grpcio-tools (>=1.59.2)"] name = "grpcio-status" version = "1.59.2" description = "Status proto mapping for gRPC" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -2011,7 +2106,6 @@ protobuf = ">=4.21.6" name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2023,7 +2117,6 @@ files = [ name = "html5lib" version = "1.1" description = "HTML parser based on the WHATWG HTML specification" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -2045,7 +2138,6 @@ lxml = ["lxml"] name = "httpcore" version = "1.0.2" description = "A minimal low-level HTTP client." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2060,14 +2152,61 @@ h11 = ">=0.13,<0.15" [package.extras] asyncio = ["anyio (>=4.0,<5.0)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] trio = ["trio (>=0.22.0,<0.23.0)"] +[[package]] +name = "httptools" +version = "0.6.1" +description = "A collection of framework independent HTTP protocol utils." +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "httptools-0.6.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d2f6c3c4cb1948d912538217838f6e9960bc4a521d7f9b323b3da579cd14532f"}, + {file = "httptools-0.6.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:00d5d4b68a717765b1fabfd9ca755bd12bf44105eeb806c03d1962acd9b8e563"}, + {file = "httptools-0.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:639dc4f381a870c9ec860ce5c45921db50205a37cc3334e756269736ff0aac58"}, + {file = "httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e57997ac7fb7ee43140cc03664de5f268813a481dff6245e0075925adc6aa185"}, + {file = "httptools-0.6.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0ac5a0ae3d9f4fe004318d64b8a854edd85ab76cffbf7ef5e32920faef62f142"}, + {file = "httptools-0.6.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3f30d3ce413088a98b9db71c60a6ada2001a08945cb42dd65a9a9fe228627658"}, + {file = "httptools-0.6.1-cp310-cp310-win_amd64.whl", hash = "sha256:1ed99a373e327f0107cb513b61820102ee4f3675656a37a50083eda05dc9541b"}, + {file = "httptools-0.6.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7a7ea483c1a4485c71cb5f38be9db078f8b0e8b4c4dc0210f531cdd2ddac1ef1"}, + {file = "httptools-0.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:85ed077c995e942b6f1b07583e4eb0a8d324d418954fc6af913d36db7c05a5a0"}, + {file = "httptools-0.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b0bb634338334385351a1600a73e558ce619af390c2b38386206ac6a27fecfc"}, + {file = "httptools-0.6.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d9ceb2c957320def533671fc9c715a80c47025139c8d1f3797477decbc6edd2"}, + {file = "httptools-0.6.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4f0f8271c0a4db459f9dc807acd0eadd4839934a4b9b892f6f160e94da309837"}, + {file = "httptools-0.6.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6a4f5ccead6d18ec072ac0b84420e95d27c1cdf5c9f1bc8fbd8daf86bd94f43d"}, + {file = "httptools-0.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:5cceac09f164bcba55c0500a18fe3c47df29b62353198e4f37bbcc5d591172c3"}, + {file = "httptools-0.6.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:75c8022dca7935cba14741a42744eee13ba05db00b27a4b940f0d646bd4d56d0"}, + {file = "httptools-0.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:48ed8129cd9a0d62cf4d1575fcf90fb37e3ff7d5654d3a5814eb3d55f36478c2"}, + {file = "httptools-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f58e335a1402fb5a650e271e8c2d03cfa7cea46ae124649346d17bd30d59c90"}, + {file = "httptools-0.6.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93ad80d7176aa5788902f207a4e79885f0576134695dfb0fefc15b7a4648d503"}, + {file = "httptools-0.6.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9bb68d3a085c2174c2477eb3ffe84ae9fb4fde8792edb7bcd09a1d8467e30a84"}, + {file = "httptools-0.6.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b512aa728bc02354e5ac086ce76c3ce635b62f5fbc32ab7082b5e582d27867bb"}, + {file = "httptools-0.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:97662ce7fb196c785344d00d638fc9ad69e18ee4bfb4000b35a52efe5adcc949"}, + {file = "httptools-0.6.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8e216a038d2d52ea13fdd9b9c9c7459fb80d78302b257828285eca1c773b99b3"}, + {file = "httptools-0.6.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3e802e0b2378ade99cd666b5bffb8b2a7cc8f3d28988685dc300469ea8dd86cb"}, + {file = "httptools-0.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4bd3e488b447046e386a30f07af05f9b38d3d368d1f7b4d8f7e10af85393db97"}, + {file = "httptools-0.6.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe467eb086d80217b7584e61313ebadc8d187a4d95bb62031b7bab4b205c3ba3"}, + {file = "httptools-0.6.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3c3b214ce057c54675b00108ac42bacf2ab8f85c58e3f324a4e963bbc46424f4"}, + {file = "httptools-0.6.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8ae5b97f690badd2ca27cbf668494ee1b6d34cf1c464271ef7bfa9ca6b83ffaf"}, + {file = "httptools-0.6.1-cp38-cp38-win_amd64.whl", hash = "sha256:405784577ba6540fa7d6ff49e37daf104e04f4b4ff2d1ac0469eaa6a20fde084"}, + {file = "httptools-0.6.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:95fb92dd3649f9cb139e9c56604cc2d7c7bf0fc2e7c8d7fbd58f96e35eddd2a3"}, + {file = "httptools-0.6.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dcbab042cc3ef272adc11220517278519adf8f53fd3056d0e68f0a6f891ba94e"}, + {file = "httptools-0.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cf2372e98406efb42e93bfe10f2948e467edfd792b015f1b4ecd897903d3e8d"}, + {file = "httptools-0.6.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:678fcbae74477a17d103b7cae78b74800d795d702083867ce160fc202104d0da"}, + {file = "httptools-0.6.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e0b281cf5a125c35f7f6722b65d8542d2e57331be573e9e88bc8b0115c4a7a81"}, + {file = "httptools-0.6.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:95658c342529bba4e1d3d2b1a874db16c7cca435e8827422154c9da76ac4e13a"}, + {file = "httptools-0.6.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ebaec1bf683e4bf5e9fbb49b8cc36da482033596a415b3e4ebab5a4c0d7ec5e"}, + {file = "httptools-0.6.1.tar.gz", hash = "sha256:c6e26c30455600b95d94b1b836085138e82f177351454ee841c148f93a9bad5a"}, +] + +[package.extras] +test = ["Cython (>=0.29.24,<0.30.0)"] + [[package]] name = "httpx" version = "0.25.1" description = "The next generation HTTP client." -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2084,15 +2223,14 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "huggingface-hub" version = "0.18.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -category = "main" optional = true python-versions = ">=3.8.0" files = [ @@ -2122,11 +2260,24 @@ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jed torch = ["torch"] typing = ["pydantic (<2.0)", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +[[package]] +name = "humanfriendly" +version = "10.0" +description = "Human friendly output for text interfaces using Python" +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, + {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, +] + +[package.dependencies] +pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} + [[package]] name = "identify" version = "2.5.31" description = "File identification library for Python" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2141,7 +2292,6 @@ license = ["ukkonen"] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2153,7 +2303,6 @@ files = [ name = "importlib-metadata" version = "6.8.0" description = "Read metadata from Python packages" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2173,7 +2322,6 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs name = "importlib-resources" version = "6.1.0" description = "Read resources from Python packages" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2192,7 +2340,6 @@ testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2204,7 +2351,6 @@ files = [ name = "ipython" version = "8.17.2" description = "IPython: Productive Interactive Computing" -category = "main" optional = false python-versions = ">=3.9" files = [ @@ -2243,7 +2389,6 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pa name = "itsdangerous" version = "2.1.2" description = "Safely pass data to untrusted environments and back." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2255,7 +2400,6 @@ files = [ name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2273,14 +2417,13 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] [[package]] name = "jinja2" -version = "3.1.2" +version = "3.1.3" description = "A very fast and expressive template engine." -category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, - {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, + {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"}, + {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"}, ] [package.dependencies] @@ -2293,7 +2436,6 @@ i18n = ["Babel (>=2.7)"] name = "jinxed" version = "1.2.1" description = "Jinxed Terminal Library" -category = "main" optional = true python-versions = "*" files = [ @@ -2308,7 +2450,6 @@ ansicon = {version = "*", markers = "platform_system == \"Windows\""} name = "joblib" version = "1.3.2" description = "Lightweight pipelining with Python functions" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2320,7 +2461,6 @@ files = [ name = "jsonpatch" version = "1.33" description = "Apply JSON-Patches (RFC 6902)" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -2335,7 +2475,6 @@ jsonpointer = ">=1.9" name = "jsonpointer" version = "2.4" description = "Identify specific nodes in a JSON document (RFC 6901)" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ @@ -2347,7 +2486,6 @@ files = [ name = "jsonschema" version = "4.19.2" description = "An implementation of JSON Schema validation for Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2369,7 +2507,6 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2023.7.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2384,7 +2521,6 @@ referencing = ">=0.28.0" name = "kaleido" version = "0.2.0" description = "Static image export for web-based visualization libraries with zero dependencies" -category = "main" optional = true python-versions = "*" files = [ @@ -2400,7 +2536,6 @@ files = [ name = "kiwisolver" version = "1.4.5" description = "A fast implementation of the Cassowary constraint solver" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2510,11 +2645,36 @@ files = [ {file = "kiwisolver-1.4.5.tar.gz", hash = "sha256:e57e563a57fb22a142da34f38acc2fc1a5c864bc29ca1517a88abc963e60d6ec"}, ] +[[package]] +name = "kubernetes" +version = "29.0.0" +description = "Kubernetes python client" +optional = true +python-versions = ">=3.6" +files = [ + {file = "kubernetes-29.0.0-py2.py3-none-any.whl", hash = "sha256:ab8cb0e0576ccdfb71886366efb102c6a20f268d817be065ce7f9909c631e43e"}, + {file = "kubernetes-29.0.0.tar.gz", hash = "sha256:c4812e227ae74d07d53c88293e564e54b850452715a59a927e7e1bc6b9a60459"}, +] + +[package.dependencies] +certifi = ">=14.05.14" +google-auth = ">=1.0.1" +oauthlib = ">=3.2.2" +python-dateutil = ">=2.5.3" +pyyaml = ">=5.4.1" +requests = "*" +requests-oauthlib = "*" +six = ">=1.9.0" +urllib3 = ">=1.24.2" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" + +[package.extras] +adal = ["adal (>=1.0.2)"] + [[package]] name = "langchain" version = "0.1.9" description = "Building applications with LLMs through composability" -category = "main" optional = true python-versions = ">=3.8.1,<4.0" files = [ @@ -2555,7 +2715,6 @@ text-helpers = ["chardet (>=5.1.0,<6.0.0)"] name = "langchain-community" version = "0.0.24" description = "Community contributed LangChain integrations." -category = "main" optional = true python-versions = ">=3.8.1,<4.0" files = [ @@ -2582,7 +2741,6 @@ extended-testing = ["aiosqlite (>=0.19.0,<0.20.0)", "aleph-alpha-client (>=2.15. name = "langchain-core" version = "0.1.26" description = "Building applications with LLMs through composability" -category = "main" optional = true python-versions = ">=3.8.1,<4.0" files = [ @@ -2605,14 +2763,13 @@ extended-testing = ["jinja2 (>=3,<4)"] [[package]] name = "langsmith" -version = "0.1.7" +version = "0.1.8" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." -category = "main" optional = true python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.1.7-py3-none-any.whl", hash = "sha256:391320049d5e4e630b92b517d144e181caa21b8d80192d8d4bfd24be51aab40c"}, - {file = "langsmith-0.1.7.tar.gz", hash = "sha256:764a425d332539921d7e861c0276e4c46ba7af0f6cb47b5328290929218d41a8"}, + {file = "langsmith-0.1.8-py3-none-any.whl", hash = "sha256:f4320fd80ec9d311a648e7d4c44e0814e6e5454772c5026f40db0307bc07e287"}, + {file = "langsmith-0.1.8.tar.gz", hash = "sha256:ab5f1cdfb7d418109ea506d41928fb8708547db2f6c7f7da7cfe997f3c55767b"}, ] [package.dependencies] @@ -2624,7 +2781,6 @@ requests = ">=2,<3" name = "lxml" version = "4.9.3" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" files = [ @@ -2732,7 +2888,6 @@ source = ["Cython (>=0.29.35)"] name = "lz4" version = "4.3.2" description = "LZ4 Bindings for Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2782,7 +2937,6 @@ tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] name = "mako" version = "1.2.4" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -2802,7 +2956,6 @@ testing = ["pytest"] name = "markdown" version = "3.5.1" description = "Python implementation of John Gruber's Markdown." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2821,7 +2974,6 @@ testing = ["coverage", "pyyaml"] name = "markdown-include" version = "0.6.0" description = "This is an extension to Python-Markdown which provides an \"include\" function, similar to that found in LaTeX (and also the C pre-processor and Fortran). I originally wrote it for my FORD Fortran auto-documentation generator." -category = "dev" optional = false python-versions = "*" files = [ @@ -2835,7 +2987,6 @@ markdown = "*" name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2860,7 +3011,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2920,7 +3070,6 @@ files = [ name = "marshmallow" version = "3.20.1" description = "A lightweight library for converting complex datatypes to and from native Python datatypes." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -2941,7 +3090,6 @@ tests = ["pytest", "pytz", "simplejson"] name = "marshmallow-enum" version = "1.5.1" description = "Enum field for Marshmallow" -category = "main" optional = true python-versions = "*" files = [ @@ -2956,7 +3104,6 @@ marshmallow = ">=2.0.0" name = "matplotlib" version = "3.8.1" description = "Python plotting package" -category = "main" optional = false python-versions = ">=3.9" files = [ @@ -3006,7 +3153,6 @@ python-dateutil = ">=2.7" name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -3021,7 +3167,6 @@ traitlets = "*" name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3033,7 +3178,6 @@ files = [ name = "mergedeep" version = "1.3.4" description = "A deep merge function for 🐍." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -3045,7 +3189,6 @@ files = [ name = "mkdocs" version = "1.5.3" description = "Project documentation with Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3077,7 +3220,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp name = "mkdocs-autorefs" version = "0.5.0" description = "Automatically link across pages in MkDocs." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3093,7 +3235,6 @@ mkdocs = ">=1.1" name = "mkdocstrings" version = "0.23.0" description = "Automatic documentation from sources, for MkDocs." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3120,7 +3261,6 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] name = "mkdocstrings-python" version = "1.7.2" description = "A Python handler for mkdocstrings." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3132,11 +3272,102 @@ files = [ griffe = ">=0.35" mkdocstrings = ">=0.20" +[[package]] +name = "mmh3" +version = "4.1.0" +description = "Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions." +optional = true +python-versions = "*" +files = [ + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be5ac76a8b0cd8095784e51e4c1c9c318c19edcd1709a06eb14979c8d850c31a"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:98a49121afdfab67cd80e912b36404139d7deceb6773a83620137aaa0da5714c"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5259ac0535874366e7d1a5423ef746e0d36a9e3c14509ce6511614bdc5a7ef5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5950827ca0453a2be357696da509ab39646044e3fa15cad364eb65d78797437"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dd0f652ae99585b9dd26de458e5f08571522f0402155809fd1dc8852a613a39"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d25548070942fab1e4a6f04d1626d67e66d0b81ed6571ecfca511f3edf07e6"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53db8d9bad3cb66c8f35cbc894f336273f63489ce4ac416634932e3cbe79eb5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75da0f615eb55295a437264cc0b736753f830b09d102aa4c2a7d719bc445ec05"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b926b07fd678ea84b3a2afc1fa22ce50aeb627839c44382f3d0291e945621e1a"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c5b053334f9b0af8559d6da9dc72cef0a65b325ebb3e630c680012323c950bb6"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:5bf33dc43cd6de2cb86e0aa73a1cc6530f557854bbbe5d59f41ef6de2e353d7b"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fa7eacd2b830727ba3dd65a365bed8a5c992ecd0c8348cf39a05cc77d22f4970"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:42dfd6742b9e3eec599f85270617debfa0bbb913c545bb980c8a4fa7b2d047da"}, + {file = "mmh3-4.1.0-cp310-cp310-win32.whl", hash = "sha256:2974ad343f0d39dcc88e93ee6afa96cedc35a9883bc067febd7ff736e207fa47"}, + {file = "mmh3-4.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:74699a8984ded645c1a24d6078351a056f5a5f1fe5838870412a68ac5e28d865"}, + {file = "mmh3-4.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f0dc874cedc23d46fc488a987faa6ad08ffa79e44fb08e3cd4d4cf2877c00a00"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3280a463855b0eae64b681cd5b9ddd9464b73f81151e87bb7c91a811d25619e6"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:97ac57c6c3301769e757d444fa7c973ceb002cb66534b39cbab5e38de61cd896"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b6502cdb4dbd880244818ab363c8770a48cdccecf6d729ade0241b736b5ec0"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ba2da04671a9621580ddabf72f06f0e72c1c9c3b7b608849b58b11080d8f14"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a5fef4c4ecc782e6e43fbeab09cff1bac82c998a1773d3a5ee6a3605cde343e"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5135358a7e00991f73b88cdc8eda5203bf9de22120d10a834c5761dbeb07dd13"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cff9ae76a54f7c6fe0167c9c4028c12c1f6de52d68a31d11b6790bb2ae685560"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f02576a4d106d7830ca90278868bf0983554dd69183b7bbe09f2fcd51cf54f"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:073d57425a23721730d3ff5485e2da489dd3c90b04e86243dd7211f889898106"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:71e32ddec7f573a1a0feb8d2cf2af474c50ec21e7a8263026e8d3b4b629805db"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7cbb20b29d57e76a58b40fd8b13a9130db495a12d678d651b459bf61c0714cea"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a42ad267e131d7847076bb7e31050f6c4378cd38e8f1bf7a0edd32f30224d5c9"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a013979fc9390abadc445ea2527426a0e7a4495c19b74589204f9b71bcaafeb"}, + {file = "mmh3-4.1.0-cp311-cp311-win32.whl", hash = "sha256:1d3b1cdad7c71b7b88966301789a478af142bddcb3a2bee563f7a7d40519a00f"}, + {file = "mmh3-4.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0dc6dc32eb03727467da8e17deffe004fbb65e8b5ee2b502d36250d7a3f4e2ec"}, + {file = "mmh3-4.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9ae3a5c1b32dda121c7dc26f9597ef7b01b4c56a98319a7fe86c35b8bc459ae6"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0033d60c7939168ef65ddc396611077a7268bde024f2c23bdc283a19123f9e9c"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d6af3e2287644b2b08b5924ed3a88c97b87b44ad08e79ca9f93d3470a54a41c5"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d82eb4defa245e02bb0b0dc4f1e7ee284f8d212633389c91f7fba99ba993f0a2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba245e94b8d54765e14c2d7b6214e832557e7856d5183bc522e17884cab2f45d"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb04e2feeabaad6231e89cd43b3d01a4403579aa792c9ab6fdeef45cc58d4ec0"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e3b1a27def545ce11e36158ba5d5390cdbc300cfe456a942cc89d649cf7e3b2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce0ab79ff736d7044e5e9b3bfe73958a55f79a4ae672e6213e92492ad5e734d5"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b02268be6e0a8eeb8a924d7db85f28e47344f35c438c1e149878bb1c47b1cd3"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:deb887f5fcdaf57cf646b1e062d56b06ef2f23421c80885fce18b37143cba828"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99dd564e9e2b512eb117bd0cbf0f79a50c45d961c2a02402787d581cec5448d5"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:08373082dfaa38fe97aa78753d1efd21a1969e51079056ff552e687764eafdfe"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:54b9c6a2ea571b714e4fe28d3e4e2db37abfd03c787a58074ea21ee9a8fd1740"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a7b1edf24c69e3513f879722b97ca85e52f9032f24a52284746877f6a7304086"}, + {file = "mmh3-4.1.0-cp312-cp312-win32.whl", hash = "sha256:411da64b951f635e1e2284b71d81a5a83580cea24994b328f8910d40bed67276"}, + {file = "mmh3-4.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:bebc3ecb6ba18292e3d40c8712482b4477abd6981c2ebf0e60869bd90f8ac3a9"}, + {file = "mmh3-4.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:168473dd608ade6a8d2ba069600b35199a9af837d96177d3088ca91f2b3798e3"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:372f4b7e1dcde175507640679a2a8790185bb71f3640fc28a4690f73da986a3b"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:438584b97f6fe13e944faf590c90fc127682b57ae969f73334040d9fa1c7ffa5"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6e27931b232fc676675fac8641c6ec6b596daa64d82170e8597f5a5b8bdcd3b6"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:571a92bad859d7b0330e47cfd1850b76c39b615a8d8e7aa5853c1f971fd0c4b1"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a69d6afe3190fa08f9e3a58e5145549f71f1f3fff27bd0800313426929c7068"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afb127be0be946b7630220908dbea0cee0d9d3c583fa9114a07156f98566dc28"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:940d86522f36348ef1a494cbf7248ab3f4a1638b84b59e6c9e90408bd11ad729"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3dcccc4935686619a8e3d1f7b6e97e3bd89a4a796247930ee97d35ea1a39341"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01bb9b90d61854dfc2407c5e5192bfb47222d74f29d140cb2dd2a69f2353f7cc"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:bcb1b8b951a2c0b0fb8a5426c62a22557e2ffc52539e0a7cc46eb667b5d606a9"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6477a05d5e5ab3168e82e8b106e316210ac954134f46ec529356607900aea82a"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:da5892287e5bea6977364b15712a2573c16d134bc5fdcdd4cf460006cf849278"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:99180d7fd2327a6fffbaff270f760576839dc6ee66d045fa3a450f3490fda7f5"}, + {file = "mmh3-4.1.0-cp38-cp38-win32.whl", hash = "sha256:9b0d4f3949913a9f9a8fb1bb4cc6ecd52879730aab5ff8c5a3d8f5b593594b73"}, + {file = "mmh3-4.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:598c352da1d945108aee0c3c3cfdd0e9b3edef74108f53b49d481d3990402169"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:475d6d1445dd080f18f0f766277e1237fa2914e5fe3307a3b2a3044f30892103"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5ca07c41e6a2880991431ac717c2a049056fff497651a76e26fc22224e8b5732"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ebe052fef4bbe30c0548d12ee46d09f1b69035ca5208a7075e55adfe091be44"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaefd42e85afb70f2b855a011f7b4d8a3c7e19c3f2681fa13118e4d8627378c5"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0ae43caae5a47afe1b63a1ae3f0986dde54b5fb2d6c29786adbfb8edc9edfb"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6218666f74c8c013c221e7f5f8a693ac9cf68e5ac9a03f2373b32d77c48904de"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac59294a536ba447b5037f62d8367d7d93b696f80671c2c45645fa9f1109413c"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086844830fcd1e5c84fec7017ea1ee8491487cfc877847d96f86f68881569d2e"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e42b38fad664f56f77f6fbca22d08450f2464baa68acdbf24841bf900eb98e87"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d08b790a63a9a1cde3b5d7d733ed97d4eb884bfbc92f075a091652d6bfd7709a"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:73ea4cc55e8aea28c86799ecacebca09e5f86500414870a8abaedfcbaf74d288"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f90938ff137130e47bcec8dc1f4ceb02f10178c766e2ef58a9f657ff1f62d124"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:aa1f13e94b8631c8cd53259250556edcf1de71738936b60febba95750d9632bd"}, + {file = "mmh3-4.1.0-cp39-cp39-win32.whl", hash = "sha256:a3b680b471c181490cf82da2142029edb4298e1bdfcb67c76922dedef789868d"}, + {file = "mmh3-4.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:fefef92e9c544a8dbc08f77a8d1b6d48006a750c4375bbcd5ff8199d761e263b"}, + {file = "mmh3-4.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:8e2c1f6a2b41723a4f82bd5a762a777836d29d664fc0095f17910bea0adfd4a6"}, + {file = "mmh3-4.1.0.tar.gz", hash = "sha256:a1cf25348b9acd229dda464a094d6170f47d2850a1fcb762a3b6172d2ce6ca4a"}, +] + +[package.extras] +test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] + [[package]] name = "modin" version = "0.18.1" description = "Modin: Make your pandas code run faster by changing one line of code." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3162,11 +3393,38 @@ spreadsheet = ["modin-spreadsheet (>=0.1.0)"] sql = ["dfsql (>=0.4.2)", "pyparsing (<=2.4.7)"] unidist = ["unidist[mpi] (>=0.2.1)"] +[[package]] +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" +optional = true +python-versions = "*" +files = [ + {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, + {file = "monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7"}, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = true +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + [[package]] name = "msgpack" version = "1.0.7" description = "MessagePack serializer" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3232,7 +3490,6 @@ files = [ name = "multidict" version = "6.0.4" description = "multidict implementation" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3316,7 +3573,6 @@ files = [ name = "multitasking" version = "0.0.11" description = "Non-blocking Python methods using decorators" -category = "main" optional = true python-versions = "*" files = [ @@ -3328,7 +3584,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -3340,7 +3595,6 @@ files = [ name = "nodeenv" version = "1.8.0" description = "Node.js virtual environment builder" -category = "dev" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" files = [ @@ -3355,7 +3609,6 @@ setuptools = "*" name = "numpy" version = "1.25.2" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.9" files = [ @@ -3390,7 +3643,6 @@ files = [ name = "nvidia-ml-py" version = "12.535.133" description = "Python Bindings for the NVIDIA Management Library" -category = "main" optional = true python-versions = "*" files = [ @@ -3402,7 +3654,6 @@ files = [ name = "oauthlib" version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3415,11 +3666,51 @@ rsa = ["cryptography (>=3.0.0)"] signals = ["blinker (>=1.4.0)"] signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] +[[package]] +name = "onnxruntime" +version = "1.16.3" +description = "ONNX Runtime is a runtime accelerator for Machine Learning models" +optional = true +python-versions = "*" +files = [ + {file = "onnxruntime-1.16.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:3bc41f323ac77acfed190be8ffdc47a6a75e4beeb3473fbf55eeb075ccca8df2"}, + {file = "onnxruntime-1.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:212741b519ee61a4822c79c47147d63a8b0ffde25cd33988d3d7be9fbd51005d"}, + {file = "onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f91f5497fe3df4ceee2f9e66c6148d9bfeb320cd6a71df361c66c5b8bac985a"}, + {file = "onnxruntime-1.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef2b1fc269cabd27f129fb9058917d6fdc89b188c49ed8700f300b945c81f889"}, + {file = "onnxruntime-1.16.3-cp310-cp310-win32.whl", hash = "sha256:f36b56a593b49a3c430be008c2aea6658d91a3030115729609ec1d5ffbaab1b6"}, + {file = "onnxruntime-1.16.3-cp310-cp310-win_amd64.whl", hash = "sha256:3c467eaa3d2429c026b10c3d17b78b7f311f718ef9d2a0d6938e5c3c2611b0cf"}, + {file = "onnxruntime-1.16.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:a225bb683991001d111f75323d355b3590e75e16b5e0f07a0401e741a0143ea1"}, + {file = "onnxruntime-1.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9aded21fe3d898edd86be8aa2eb995aa375e800ad3dfe4be9f618a20b8ee3630"}, + {file = "onnxruntime-1.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00cccc37a5195c8fca5011b9690b349db435986bd508eb44c9fce432da9228a4"}, + {file = "onnxruntime-1.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e253e572021563226a86f1c024f8f70cdae28f2fb1cc8c3a9221e8b1ce37db5"}, + {file = "onnxruntime-1.16.3-cp311-cp311-win32.whl", hash = "sha256:a82a8f0b4c978d08f9f5c7a6019ae51151bced9fd91e5aaa0c20a9e4ac7a60b6"}, + {file = "onnxruntime-1.16.3-cp311-cp311-win_amd64.whl", hash = "sha256:78d81d9af457a1dc90db9a7da0d09f3ccb1288ea1236c6ab19f0ca61f3eee2d3"}, + {file = "onnxruntime-1.16.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:04ebcd29c20473596a1412e471524b2fb88d55e6301c40b98dd2407b5911595f"}, + {file = "onnxruntime-1.16.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9996bab0f202a6435ab867bc55598f15210d0b72794d5de83712b53d564084ae"}, + {file = "onnxruntime-1.16.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b8f5083f903408238883821dd8c775f8120cb4a604166dbdabe97f4715256d5"}, + {file = "onnxruntime-1.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c2dcf1b70f8434abb1116fe0975c00e740722aaf321997195ea3618cc00558e"}, + {file = "onnxruntime-1.16.3-cp38-cp38-win32.whl", hash = "sha256:d4a0151e1accd04da6711f6fd89024509602f82c65a754498e960b032359b02d"}, + {file = "onnxruntime-1.16.3-cp38-cp38-win_amd64.whl", hash = "sha256:e8aa5bba78afbd4d8a2654b14ec7462ff3ce4a6aad312a3c2d2c2b65009f2541"}, + {file = "onnxruntime-1.16.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:6829dc2a79d48c911fedaf4c0f01e03c86297d32718a3fdee7a282766dfd282a"}, + {file = "onnxruntime-1.16.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:76f876c53bfa912c6c242fc38213a6f13f47612d4360bc9d599bd23753e53161"}, + {file = "onnxruntime-1.16.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4137e5d443e2dccebe5e156a47f1d6d66f8077b03587c35f11ee0c7eda98b533"}, + {file = "onnxruntime-1.16.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c56695c1a343c7c008b647fff3df44da63741fbe7b6003ef576758640719be7b"}, + {file = "onnxruntime-1.16.3-cp39-cp39-win32.whl", hash = "sha256:985a029798744ce4743fcf8442240fed35c8e4d4d30ec7d0c2cdf1388cd44408"}, + {file = "onnxruntime-1.16.3-cp39-cp39-win_amd64.whl", hash = "sha256:28ff758b17ce3ca6bcad3d936ec53bd7f5482e7630a13f6dcae518eba8f71d85"}, +] + +[package.dependencies] +coloredlogs = "*" +flatbuffers = "*" +numpy = ">=1.21.6" +packaging = "*" +protobuf = "*" +sympy = "*" + [[package]] name = "openai" version = "1.3.0" description = "The official Python library for the openai API" -category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -3442,7 +3733,6 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] name = "opencensus" version = "0.11.4" description = "A stats collection and distributed tracing framework" -category = "main" optional = true python-versions = "*" files = [ @@ -3459,7 +3749,6 @@ six = ">=1.16,<2.0" name = "opencensus-context" version = "0.1.3" description = "OpenCensus Runtime Context" -category = "main" optional = true python-versions = "*" files = [ @@ -3471,7 +3760,6 @@ files = [ name = "openpyxl" version = "3.1.2" description = "A Python library to read/write Excel 2010 xlsx/xlsm files" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3482,11 +3770,176 @@ files = [ [package.dependencies] et-xmlfile = "*" +[[package]] +name = "opentelemetry-api" +version = "1.22.0" +description = "OpenTelemetry Python API" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_api-1.22.0-py3-none-any.whl", hash = "sha256:43621514301a7e9f5d06dd8013a1b450f30c2e9372b8e30aaeb4562abf2ce034"}, + {file = "opentelemetry_api-1.22.0.tar.gz", hash = "sha256:15ae4ca925ecf9cfdfb7a709250846fbb08072260fca08ade78056c502b86bed"}, +] + +[package.dependencies] +deprecated = ">=1.2.6" +importlib-metadata = ">=6.0,<7.0" + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.22.0" +description = "OpenTelemetry Protobuf encoding" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_exporter_otlp_proto_common-1.22.0-py3-none-any.whl", hash = "sha256:3f2538bec5312587f8676c332b3747f54c89fe6364803a807e217af4603201fa"}, + {file = "opentelemetry_exporter_otlp_proto_common-1.22.0.tar.gz", hash = "sha256:71ae2f81bc6d6fe408d06388826edc8933759b2ca3a97d24054507dc7cfce52d"}, +] + +[package.dependencies] +backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} +opentelemetry-proto = "1.22.0" + +[[package]] +name = "opentelemetry-exporter-otlp-proto-grpc" +version = "1.22.0" +description = "OpenTelemetry Collector Protobuf over gRPC Exporter" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_exporter_otlp_proto_grpc-1.22.0-py3-none-any.whl", hash = "sha256:b5bcadc129272004316a455e9081216d3380c1fc2231a928ea6a70aa90e173fb"}, + {file = "opentelemetry_exporter_otlp_proto_grpc-1.22.0.tar.gz", hash = "sha256:1e0e5aa4bbabc74942f06f268deffd94851d12a8dc30b02527472ef1729fe5b1"}, +] + +[package.dependencies] +backoff = {version = ">=1.10.0,<3.0.0", markers = "python_version >= \"3.7\""} +deprecated = ">=1.2.6" +googleapis-common-protos = ">=1.52,<2.0" +grpcio = ">=1.0.0,<2.0.0" +opentelemetry-api = ">=1.15,<2.0" +opentelemetry-exporter-otlp-proto-common = "1.22.0" +opentelemetry-proto = "1.22.0" +opentelemetry-sdk = ">=1.22.0,<1.23.0" + +[package.extras] +test = ["pytest-grpc"] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.43b0" +description = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_instrumentation-0.43b0-py3-none-any.whl", hash = "sha256:0ff1334d7e359e27640e9d420024efeb73eacae464309c2e14ede7ba6c93967e"}, + {file = "opentelemetry_instrumentation-0.43b0.tar.gz", hash = "sha256:c3755da6c4be8033be0216d0501e11f4832690f4e2eca5a3576fbf113498f0f6"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.4,<2.0" +setuptools = ">=16.0" +wrapt = ">=1.0.0,<2.0.0" + +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.43b0" +description = "ASGI instrumentation for OpenTelemetry" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_instrumentation_asgi-0.43b0-py3-none-any.whl", hash = "sha256:1f593829fa039e9367820736fb063e92acd15c25b53d7bcb5d319971b8e93fd7"}, + {file = "opentelemetry_instrumentation_asgi-0.43b0.tar.gz", hash = "sha256:3f6f19333dca31ef696672e4e36cb1c2613c71dc7e847c11ff36a37e1130dadc"}, +] + +[package.dependencies] +asgiref = ">=3.0,<4.0" +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.43b0" +opentelemetry-semantic-conventions = "0.43b0" +opentelemetry-util-http = "0.43b0" + +[package.extras] +instruments = ["asgiref (>=3.0,<4.0)"] +test = ["opentelemetry-instrumentation-asgi[instruments]", "opentelemetry-test-utils (==0.43b0)"] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.43b0" +description = "OpenTelemetry FastAPI Instrumentation" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_instrumentation_fastapi-0.43b0-py3-none-any.whl", hash = "sha256:b79c044df68a52e07b35fa12a424e7cc0dd27ff0a171c5fdcc41dea9de8fc938"}, + {file = "opentelemetry_instrumentation_fastapi-0.43b0.tar.gz", hash = "sha256:2afaaf470622e1a2732182c68f6d2431ffe5e026a7edacd0f83605632b66347f"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.12,<2.0" +opentelemetry-instrumentation = "0.43b0" +opentelemetry-instrumentation-asgi = "0.43b0" +opentelemetry-semantic-conventions = "0.43b0" +opentelemetry-util-http = "0.43b0" + +[package.extras] +instruments = ["fastapi (>=0.58,<1.0)"] +test = ["httpx (>=0.22,<1.0)", "opentelemetry-instrumentation-fastapi[instruments]", "opentelemetry-test-utils (==0.43b0)", "requests (>=2.23,<3.0)"] + +[[package]] +name = "opentelemetry-proto" +version = "1.22.0" +description = "OpenTelemetry Python Proto" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_proto-1.22.0-py3-none-any.whl", hash = "sha256:ce7188d22c75b6d0fe53e7fb58501613d0feade5139538e79dedd9420610fa0c"}, + {file = "opentelemetry_proto-1.22.0.tar.gz", hash = "sha256:9ec29169286029f17ca34ec1f3455802ffb90131642d2f545ece9a63e8f69003"}, +] + +[package.dependencies] +protobuf = ">=3.19,<5.0" + +[[package]] +name = "opentelemetry-sdk" +version = "1.22.0" +description = "OpenTelemetry Python SDK" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_sdk-1.22.0-py3-none-any.whl", hash = "sha256:a730555713d7c8931657612a88a141e3a4fe6eb5523d9e2d5a8b1e673d76efa6"}, + {file = "opentelemetry_sdk-1.22.0.tar.gz", hash = "sha256:45267ac1f38a431fc2eb5d6e0c0d83afc0b78de57ac345488aa58c28c17991d0"}, +] + +[package.dependencies] +opentelemetry-api = "1.22.0" +opentelemetry-semantic-conventions = "0.43b0" +typing-extensions = ">=3.7.4" + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.43b0" +description = "OpenTelemetry Semantic Conventions" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_semantic_conventions-0.43b0-py3-none-any.whl", hash = "sha256:291284d7c1bf15fdaddf309b3bd6d3b7ce12a253cec6d27144439819a15d8445"}, + {file = "opentelemetry_semantic_conventions-0.43b0.tar.gz", hash = "sha256:b9576fb890df479626fa624e88dde42d3d60b8b6c8ae1152ad157a8b97358635"}, +] + +[[package]] +name = "opentelemetry-util-http" +version = "0.43b0" +description = "Web util for OpenTelemetry" +optional = true +python-versions = ">=3.7" +files = [ + {file = "opentelemetry_util_http-0.43b0-py3-none-any.whl", hash = "sha256:f25a820784b030f6cb86b3d76e5676c769b75ed3f55a210bcdae0a5e175ebadb"}, + {file = "opentelemetry_util_http-0.43b0.tar.gz", hash = "sha256:3ff6ab361dbe99fc81200d625603c0fb890c055c6e416a3e6d661ddf47a6c7f7"}, +] + [[package]] name = "orjson" version = "3.9.15" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3546,7 +3999,6 @@ files = [ name = "oscrypto" version = "1.3.0" description = "TLS (SSL) sockets, key generation, encryption, decryption, signing, verification and KDFs using the OS crypto libraries. Does not require a compiler, and relies on the OS for patching. Works on Windows, OS X and Linux/BSD." -category = "main" optional = true python-versions = "*" files = [ @@ -3557,11 +4009,21 @@ files = [ [package.dependencies] asn1crypto = ">=1.5.1" +[[package]] +name = "overrides" +version = "7.6.0" +description = "A decorator to automatically detect mismatch when overriding a method." +optional = true +python-versions = ">=3.6" +files = [ + {file = "overrides-7.6.0-py3-none-any.whl", hash = "sha256:c36e6635519ea9c5b043b65c36d4b886aee8bd45b7d4681d2a6df0898df4b654"}, + {file = "overrides-7.6.0.tar.gz", hash = "sha256:01e15bbbf15b766f0675c275baa1878bd1c7dc9bc7b9ee13e677cdba93dc1bd9"}, +] + [[package]] name = "packaging" version = "23.2" description = "Core utilities for Python packages" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3573,7 +4035,6 @@ files = [ name = "pandas" version = "1.5.3" description = "Powerful data structures for data analysis, time series, and statistics" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3608,8 +4069,8 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.1" @@ -3622,7 +4083,6 @@ test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] name = "parso" version = "0.8.3" description = "A Python Parser" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -3638,7 +4098,6 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.11.2" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3650,7 +4109,6 @@ files = [ name = "patsy" version = "0.5.3" description = "A Python package for describing statistical models and for building design matrices." -category = "main" optional = true python-versions = "*" files = [ @@ -3669,7 +4127,6 @@ test = ["pytest", "pytest-cov", "scipy"] name = "peewee" version = "3.17.0" description = "a little orm" -category = "main" optional = true python-versions = "*" files = [ @@ -3680,7 +4137,6 @@ files = [ name = "pexpect" version = "4.8.0" description = "Pexpect allows easy control of interactive console applications." -category = "main" optional = false python-versions = "*" files = [ @@ -3695,7 +4151,6 @@ ptyprocess = ">=0.5" name = "pillow" version = "10.1.0" description = "Python Imaging Library (Fork)" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -3763,7 +4218,6 @@ tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "pa name = "platformdirs" version = "3.11.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -3779,7 +4233,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-co name = "plotly" version = "5.18.0" description = "An open-source, interactive data visualization library for Python" -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3795,7 +4248,6 @@ tenacity = ">=6.2.0" name = "pluggy" version = "1.3.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3811,7 +4263,6 @@ testing = ["pytest", "pytest-benchmark"] name = "polars" version = "0.18.15" description = "Blazingly fast DataFrame library" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3840,11 +4291,33 @@ timezone = ["backports.zoneinfo", "tzdata"] xlsx2csv = ["xlsx2csv (>=0.8.0)"] xlsxwriter = ["xlsxwriter"] +[[package]] +name = "posthog" +version = "3.3.2" +description = "Integrate PostHog into any python application." +optional = true +python-versions = "*" +files = [ + {file = "posthog-3.3.2-py2.py3-none-any.whl", hash = "sha256:14fb43ea95c40b353db59c49af2c09ff15188aa2963f48091fc7912fa9375263"}, + {file = "posthog-3.3.2.tar.gz", hash = "sha256:734bf89f3c372605a8bbf2b07f600885287209145d747b09ccd004c59834750e"}, +] + +[package.dependencies] +backoff = ">=1.10.0" +monotonic = ">=1.5" +python-dateutil = ">2.1" +requests = ">=2.7,<3.0" +six = ">=1.5" + +[package.extras] +dev = ["black", "flake8", "flake8-print", "isort", "pre-commit"] +sentry = ["django", "sentry-sdk"] +test = ["coverage", "flake8", "freezegun (==0.3.15)", "mock (>=2.0.0)", "pylint", "pytest", "pytest-timeout"] + [[package]] name = "pre-commit" version = "3.5.0" description = "A framework for managing and maintaining multi-language pre-commit hooks." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -3863,7 +4336,6 @@ virtualenv = ">=20.10.0" name = "prometheus-client" version = "0.19.0" description = "Python client for the Prometheus monitoring system." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -3878,7 +4350,6 @@ twisted = ["twisted"] name = "prompt-toolkit" version = "3.0.39" description = "Library for building powerful interactive command lines in Python" -category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -3893,7 +4364,6 @@ wcwidth = "*" name = "proto-plus" version = "1.22.3" description = "Beautiful, Pythonic protocol buffers." -category = "main" optional = true python-versions = ">=3.6" files = [ @@ -3911,7 +4381,6 @@ testing = ["google-api-core[grpc] (>=1.31.5)"] name = "protobuf" version = "4.24.4" description = "" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3934,7 +4403,6 @@ files = [ name = "psutil" version = "5.9.8" description = "Cross-platform lib for process and system monitoring in Python." -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -3963,7 +4431,6 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] name = "psycopg2" version = "2.9.9" description = "psycopg2 - Python-PostgreSQL Database Adapter" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -3971,8 +4438,6 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, - {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, - {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -3986,7 +4451,6 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" -category = "main" optional = false python-versions = "*" files = [ @@ -3994,11 +4458,57 @@ files = [ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, ] +[[package]] +name = "pulsar-client" +version = "3.4.0" +description = "Apache Pulsar Python client library" +optional = true +python-versions = "*" +files = [ + {file = "pulsar_client-3.4.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ebf99db5244ff69479283b25621b070492acc4bb643d162d86b90387cb6fdb2a"}, + {file = "pulsar_client-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6cb5d8e1482a8aea758633be23717e0c4bb7dc53784e37915c0048c0382f134"}, + {file = "pulsar_client-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a7592e42c76034e9a8d64d42dd5bab361425f869de562e9ccad698e19cd88"}, + {file = "pulsar_client-3.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5963090a78a5644ba25f41da3a6d49ea3f00c972b095baff365916dc246426a"}, + {file = "pulsar_client-3.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:419cdcf577f755e3f31bf264300d9ba158325edb2ee9cee555d81ba1909c094e"}, + {file = "pulsar_client-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:4c93c35ee97307dae153e748b33dcd3d4f06da34bca373321aa2df73f1535705"}, + {file = "pulsar_client-3.4.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:11952fb022ee72debf53b169f4482f9dc5c890be0149ae98779864b3a21f1bd3"}, + {file = "pulsar_client-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8743c320aa96798d20cafa98ea97a68c4295fc4872c23acd5e012fd36cb06ba"}, + {file = "pulsar_client-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33571de99cd898349f17978ba62e2b839ea0275fb7067f31bf5f6ebfeae0987d"}, + {file = "pulsar_client-3.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a60c03c3e70f018538e7cd3fa84d95e283b610272b744166dbc48960a809fa07"}, + {file = "pulsar_client-3.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4c47041267b5843ffec54352d842156c279945f3e976d7025ffa89875ff76390"}, + {file = "pulsar_client-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:49fe4ab04004b476c87ab3ad22fe87346fca564a3e3ca9c0ac58fee45a895d81"}, + {file = "pulsar_client-3.4.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:1e077a4839be3ead3de3f05b4c244269dca2df07f47cea0b90544c7e9dc1642f"}, + {file = "pulsar_client-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f202b84e1f683d64672dd1971114600ae2e5c3735587286ff9bfb431385f08e8"}, + {file = "pulsar_client-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c606c04f357341042fa6c75477de7d2204f7ae50aa29c2f74b24e54c85f47f96"}, + {file = "pulsar_client-3.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c67b25ede3a578f5a7dc30230e52609ef38191f74b47e5cbdbc98c42df556927"}, + {file = "pulsar_client-3.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b7f8211cc9460cdf4d06e4e1cb878689d2aa4a7e4027bd2a2f1419a79ade16a6"}, + {file = "pulsar_client-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:c5399e9780d6951c69808c0b6175311a966af82fb08addf6e741ae37b1bee7ef"}, + {file = "pulsar_client-3.4.0-cp38-cp38-macosx_10_15_universal2.whl", hash = "sha256:a2d6c850b60106dc915d3476a490fba547c6748a5f742b68abd30d1a35355b82"}, + {file = "pulsar_client-3.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a52ea8294a9f30eb6f0a2db5dc16e3aad7ff2284f818c48ad3a6b601723be02b"}, + {file = "pulsar_client-3.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eeeede40108be12222e009285c971e5b8f6433d9f0f8ef934d6a131585921c4"}, + {file = "pulsar_client-3.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9409066c600f2b6f220552c5dfe08aeeabcf07fe0e76367aa5816b2e87a5cf72"}, + {file = "pulsar_client-3.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:58e2f886e6dab43e66c3ce990fe96209e55ab46350506829a637b77b74125fb9"}, + {file = "pulsar_client-3.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:b57dfa5063b0d9dc7664896c55605eac90753e35e80db5a959d3be2be0ab0d48"}, + {file = "pulsar_client-3.4.0-cp39-cp39-macosx_10_15_universal2.whl", hash = "sha256:7704c664aa2c801af4c2d3a58e9d8ffaeef12ce8a0f71712e9187f9a96da856f"}, + {file = "pulsar_client-3.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0364db563e27442053bdbb8655e7ffb420f491690bc2c78da5a58bd35c658ad"}, + {file = "pulsar_client-3.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3e34de19e0744d8aa3538cb2172076bccd0761b3e94ebadb7bd59765ae3d1ed"}, + {file = "pulsar_client-3.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:dc8be41dec8cb052fb1837550f495e9b73a8b3cf85e07157904ec84832758a65"}, + {file = "pulsar_client-3.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b49d669bed15b7edb9c936704310d57808f1d01c511b94d866f54fe8ffe1752d"}, + {file = "pulsar_client-3.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:88c93e5fbfc349f3967e931f7a908d15fd4fd725ebdd842423ac9cd961fe293f"}, +] + +[package.dependencies] +certifi = "*" + +[package.extras] +all = ["apache-bookkeeper-client (>=4.16.1)", "fastavro (>=1.9.2)", "grpcio (>=1.60.0)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] +avro = ["fastavro (>=1.9.2)"] +functions = ["apache-bookkeeper-client (>=4.16.1)", "grpcio (>=1.60.0)", "prometheus-client", "protobuf (>=3.6.1,<=3.20.3)", "ratelimit"] + [[package]] name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" -category = "main" optional = false python-versions = "*" files = [ @@ -4013,7 +4523,6 @@ tests = ["pytest"] name = "py-spy" version = "0.3.14" description = "Sampling profiler for Python programs" -category = "main" optional = true python-versions = "*" files = [ @@ -4030,7 +4539,6 @@ files = [ name = "pyarrow" version = "13.0.0" description = "Python library for Apache Arrow" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4072,7 +4580,6 @@ numpy = ">=1.16.6" name = "pyasn1" version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -category = "main" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -4084,7 +4591,6 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" -category = "main" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -4099,7 +4605,6 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -4111,7 +4616,6 @@ files = [ name = "pycryptodomex" version = "3.19.0" description = "Cryptographic library for Python" -category = "main" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -4153,7 +4657,6 @@ files = [ name = "pydantic" version = "2.6.2" description = "Data validation using Python type hints" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -4173,7 +4676,6 @@ email = ["email-validator (>=2.0.0)"] name = "pydantic-core" version = "2.16.3" description = "" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -4265,7 +4767,6 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pydeck" version = "0.8.0" description = "Widget for deck.gl maps" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4285,7 +4786,6 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t name = "pygments" version = "2.16.1" description = "Pygments is a syntax highlighting package written in Python." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4300,7 +4800,6 @@ plugins = ["importlib-metadata"] name = "pyhive" version = "0.7.0" description = "Python interface to Hive" -category = "main" optional = true python-versions = "*" files = [ @@ -4323,7 +4822,6 @@ trino = ["requests (>=1.0.0)"] name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4341,7 +4839,6 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pymdown-extensions" version = "10.3.1" description = "Extension pack for Python Markdown." -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -4360,7 +4857,6 @@ extra = ["pygments (>=2.12)"] name = "pymysql" version = "1.1.0" description = "Pure Python MySQL Driver" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4376,7 +4872,6 @@ rsa = ["cryptography"] name = "pyopenssl" version = "23.3.0" description = "Python wrapper module around the OpenSSL library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4395,7 +4890,6 @@ test = ["flaky", "pretend", "pytest (>=3.0.1)"] name = "pyparsing" version = "3.1.1" description = "pyparsing module - Classes and methods to define and execute parsing grammars" -category = "main" optional = false python-versions = ">=3.6.8" files = [ @@ -4406,11 +4900,45 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pypika" +version = "0.48.9" +description = "A SQL query builder API for Python" +optional = true +python-versions = "*" +files = [ + {file = "PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378"}, +] + +[[package]] +name = "pyproject-hooks" +version = "1.0.0" +description = "Wrappers to call pyproject.toml-based build backend hooks." +optional = true +python-versions = ">=3.7" +files = [ + {file = "pyproject_hooks-1.0.0-py3-none-any.whl", hash = "sha256:283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8"}, + {file = "pyproject_hooks-1.0.0.tar.gz", hash = "sha256:f271b298b97f5955d53fb12b72c1fb1948c22c1a6b70b315c54cedaca0264ef5"}, +] + +[package.dependencies] +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} + +[[package]] +name = "pyreadline3" +version = "3.4.1" +description = "A python implementation of GNU readline." +optional = true +python-versions = "*" +files = [ + {file = "pyreadline3-3.4.1-py3-none-any.whl", hash = "sha256:b0efb6516fd4fb07b45949053826a62fa4cb353db5be2bbb4a7aa1fdd1e345fb"}, + {file = "pyreadline3-3.4.1.tar.gz", hash = "sha256:6f3d1f7b8a31ba32b73917cefc1f28cc660562f39aea8646d30bd6eff21f7bae"}, +] + [[package]] name = "pytest" version = "7.4.3" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4433,7 +4961,6 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-env" version = "0.8.2" description = "py.test plugin that allows you to add environment variables." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4451,7 +4978,6 @@ test = ["coverage (>=7.2.7)", "pytest-mock (>=3.10)"] name = "pytest-mock" version = "3.12.0" description = "Thin-wrapper around the mock package for easier use with pytest" -category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -4469,7 +4995,6 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -4484,7 +5009,6 @@ six = ">=1.5" name = "python-dotenv" version = "1.0.0" description = "Read key-value pairs from a .env file and set them as environment variables" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -4499,7 +5023,6 @@ cli = ["click (>=5.0)"] name = "pytz" version = "2023.3.post1" description = "World timezone definitions, modern and historical" -category = "main" optional = false python-versions = "*" files = [ @@ -4511,7 +5034,6 @@ files = [ name = "pyyaml" version = "6.0.1" description = "YAML parser and emitter for Python" -category = "main" optional = false python-versions = ">=3.6" files = [ @@ -4561,7 +5083,6 @@ files = [ name = "pyyaml-env-tag" version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -4576,7 +5097,6 @@ pyyaml = "*" name = "ray" version = "2.9.1" description = "Ray provides a simple, universal API for building distributed applications." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4612,8 +5132,8 @@ filelock = "*" frozenlist = "*" gpustat = {version = ">=1.0.0", optional = true, markers = "extra == \"default\""} grpcio = [ - {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"default\""}, {version = ">=1.42.0", optional = true, markers = "python_version >= \"3.10\" and extra == \"default\""}, + {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"default\""}, ] jsonschema = "*" msgpack = ">=1.0.0,<2.0.0" @@ -4622,23 +5142,23 @@ packaging = "*" prometheus-client = {version = ">=0.7.1", optional = true, markers = "extra == \"default\""} protobuf = ">=3.15.3,<3.19.5 || >3.19.5" py-spy = {version = ">=0.2.0", optional = true, markers = "extra == \"default\""} -pydantic = {version = "<2.0.0 || >=2.5.0,<3", optional = true, markers = "extra == \"default\""} +pydantic = {version = "<2.0.dev0 || >=2.5.dev0,<3", optional = true, markers = "extra == \"default\""} pyyaml = "*" requests = "*" smart-open = {version = "*", optional = true, markers = "extra == \"default\""} virtualenv = {version = ">=20.0.24,<20.21.1 || >20.21.1", optional = true, markers = "extra == \"default\""} [package.extras] -air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.0 || >=2.5.0,<3)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.0 || >=2.5.0,<3)", "pyyaml", "ray-cpp (==2.9.1)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "pyyaml", "ray-cpp (==2.9.1)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] client = ["grpcio (!=1.56.0)"] cpp = ["ray-cpp (==2.9.1)"] data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"] -default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.0 || >=2.5.0,<3)", "requests", "smart-open", "virtualenv (>=20.0.24,!=20.21.1)"] +default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "virtualenv (>=20.0.24,!=20.21.1)"] observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"] -serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.0 || >=2.5.0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] -serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.0 || >=2.5.0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] +serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2.0.dev0 || >=2.5.dev0,<3)", "requests", "smart-open", "starlette", "uvicorn[standard]", "virtualenv (>=20.0.24,!=20.21.1)", "watchfiles"] train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] @@ -4646,7 +5166,6 @@ tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1. name = "referencing" version = "0.30.2" description = "JSON Referencing + Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4662,7 +5181,6 @@ rpds-py = ">=0.7.0" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -4680,11 +5198,28 @@ urllib3 = ">=1.21.1,<3" socks = ["PySocks (>=1.5.6,!=1.5.7)"] use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +optional = true +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + [[package]] name = "rich" version = "13.6.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "main" optional = true python-versions = ">=3.7.0" files = [ @@ -4703,7 +5238,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "rpds-py" version = "0.10.6" description = "Python bindings to Rust's persistent data structures (rpds)" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4812,7 +5346,6 @@ files = [ name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" -category = "main" optional = true python-versions = ">=3.6,<4" files = [ @@ -4827,7 +5360,6 @@ pyasn1 = ">=0.1.3" name = "ruff" version = "0.1.3" description = "An extremely fast Python linter, written in Rust." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4854,7 +5386,6 @@ files = [ name = "scikit-learn" version = "1.3.2" description = "A set of python modules for machine learning and data mining" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4902,7 +5433,6 @@ tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc ( name = "scipy" version = "1.9.3" description = "Fundamental algorithms for scientific computing in Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -4941,7 +5471,6 @@ test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "sciki name = "seaborn" version = "0.12.2" description = "Statistical data visualization" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -4963,7 +5492,6 @@ stats = ["scipy (>=1.3)", "statsmodels (>=0.10)"] name = "setuptools" version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -4980,7 +5508,6 @@ testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jar name = "shapely" version = "2.0.2" description = "Manipulation and analysis of geometric objects" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5031,14 +5558,13 @@ files = [ numpy = ">=1.14" [package.extras] -docs = ["matplotlib", "numpydoc (>=1.1.0,<1.2.0)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] +docs = ["matplotlib", "numpydoc (==1.1.*)", "sphinx", "sphinx-book-theme", "sphinx-remove-toctrees"] test = ["pytest", "pytest-cov"] [[package]] name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -5050,7 +5576,6 @@ files = [ name = "smart-open" version = "6.4.0" description = "Utils for streaming large files (S3, HDFS, GCS, Azure Blob Storage, gzip, bz2...)" -category = "main" optional = true python-versions = ">=3.6,<4.0" files = [ @@ -5072,7 +5597,6 @@ webhdfs = ["requests"] name = "smmap" version = "5.0.1" description = "A pure Python implementation of a sliding window memory map manager" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5084,7 +5608,6 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -5096,7 +5619,6 @@ files = [ name = "snowflake-connector-python" version = "3.3.1" description = "Snowflake Connector for Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5153,7 +5675,6 @@ secure-local-storage = ["keyring (!=16.1.0,<25.0.0)"] name = "snowflake-sqlalchemy" version = "1.5.0" description = "Snowflake SQLAlchemy Dialect" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5173,7 +5694,6 @@ pandas = ["snowflake-connector-python[pandas] (<4.0.0)"] name = "sortedcontainers" version = "2.4.0" description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set" -category = "main" optional = true python-versions = "*" files = [ @@ -5185,7 +5705,6 @@ files = [ name = "soupsieve" version = "2.5" description = "A modern CSS selector implementation for Beautiful Soup." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5197,7 +5716,6 @@ files = [ name = "sourcery" version = "1.12.0" description = "Magically refactor Python" -category = "dev" optional = false python-versions = "*" files = [ @@ -5210,7 +5728,6 @@ files = [ name = "sqlalchemy" version = "1.4.50" description = "Database Abstraction Library" -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -5242,7 +5759,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and platform_machine == \"aarch64\" or python_version >= \"3\" and platform_machine == \"ppc64le\" or python_version >= \"3\" and platform_machine == \"x86_64\" or python_version >= \"3\" and platform_machine == \"amd64\" or python_version >= \"3\" and platform_machine == \"AMD64\" or python_version >= \"3\" and platform_machine == \"win32\" or python_version >= \"3\" and platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] @@ -5265,11 +5782,35 @@ postgresql-psycopg2cffi = ["psycopg2cffi"] pymysql = ["pymysql", "pymysql (<1)"] sqlcipher = ["sqlcipher3-binary"] +[[package]] +name = "sqlalchemy-bigquery" +version = "1.9.0" +description = "SQLAlchemy dialect for BigQuery" +optional = true +python-versions = ">=3.8, <3.13" +files = [ + {file = "sqlalchemy-bigquery-1.9.0.tar.gz", hash = "sha256:549b250ad4c75fe9efaff4ee32e08deb488f1886affbb159d8c149b6b537524f"}, + {file = "sqlalchemy_bigquery-1.9.0-py2.py3-none-any.whl", hash = "sha256:b1a4c2f5b672ca7bb02e1357d6f3aeabbb19a67e986f2ccd2654fb005705e98e"}, +] + +[package.dependencies] +google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" +google-auth = ">=1.25.0,<3.0.0dev" +google-cloud-bigquery = ">=2.25.2,<4.0.0dev" +packaging = "*" +sqlalchemy = ">=1.2.0,<2.0.0dev" + +[package.extras] +alembic = ["alembic"] +all = ["GeoAlchemy2", "alembic", "google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "packaging", "pyarrow (>=3.0.0)", "pytz", "shapely"] +bqstorage = ["google-cloud-bigquery-storage (>=2.0.0,<3.0.0dev)", "grpcio (>=1.47.0,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "pyarrow (>=3.0.0)"] +geography = ["GeoAlchemy2", "shapely"] +tests = ["packaging", "pytz"] + [[package]] name = "sqlalchemy-databricks" version = "0.2.0" description = "SQLAlchemy Dialect for Databricks" -category = "main" optional = true python-versions = ">=3.8,<4.0" files = [ @@ -5286,7 +5827,6 @@ SQLAlchemy = ">=1,<2" name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" -category = "main" optional = false python-versions = "*" files = [ @@ -5302,11 +5842,28 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] +[[package]] +name = "starlette" +version = "0.35.1" +description = "The little ASGI library that shines." +optional = true +python-versions = ">=3.8" +files = [ + {file = "starlette-0.35.1-py3-none-any.whl", hash = "sha256:50bbbda9baa098e361f398fda0928062abbaf1f54f4fadcbe17c092a01eb9a25"}, + {file = "starlette-0.35.1.tar.gz", hash = "sha256:3e2639dac3520e4f58734ed22553f950d3f3cb1001cd2eaac4d57e8cdc5f66bc"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] + [[package]] name = "statsmodels" version = "0.14.0" description = "Statistical computations and models for Python" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5335,8 +5892,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, + {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, ] packaging = ">=21.3" pandas = ">=1.0" @@ -5352,7 +5909,6 @@ docs = ["ipykernel", "jupyter-client", "matplotlib", "nbconvert", "nbformat", "n name = "streamlit" version = "1.28.0" description = "A faster way to build and share data apps" -category = "main" optional = true python-versions = ">=3.8, !=3.9.7" files = [ @@ -5388,11 +5944,24 @@ watchdog = {version = ">=2.1.5", markers = "platform_system != \"Darwin\""} [package.extras] snowflake = ["snowflake-connector-python (>=2.8.0)", "snowflake-snowpark-python (>=0.9.0)"] +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = true +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] + +[package.dependencies] +mpmath = ">=0.19" + [[package]] name = "tenacity" version = "8.2.3" description = "Retry code until it succeeds" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5407,7 +5976,6 @@ doc = ["reno", "sphinx", "tornado (>=4.5)"] name = "text-generation" version = "0.6.1" description = "Hugging Face Text Generation Python Client" -category = "main" optional = true python-versions = ">=3.7,<4.0" files = [ @@ -5424,7 +5992,6 @@ pydantic = ">1.10,<3" name = "threadpoolctl" version = "3.2.0" description = "threadpoolctl" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5436,7 +6003,6 @@ files = [ name = "thrift" version = "0.16.0" description = "Python bindings for the Apache Thrift RPC system" -category = "main" optional = true python-versions = "*" files = [ @@ -5451,11 +6017,137 @@ all = ["tornado (>=4.0)", "twisted"] tornado = ["tornado (>=4.0)"] twisted = ["twisted"] +[[package]] +name = "tokenizers" +version = "0.15.1" +description = "" +optional = true +python-versions = ">=3.7" +files = [ + {file = "tokenizers-0.15.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:32c9491dd1bcb33172c26b454dbd607276af959b9e78fa766e2694cafab3103c"}, + {file = "tokenizers-0.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29a1b784b870a097e7768f8c20c2dd851e2c75dad3efdae69a79d3e7f1d614d5"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0049fbe648af04148b08cb211994ce8365ee628ce49724b56aaefd09a3007a78"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e84b3c235219e75e24de6b71e6073cd2c8d740b14d88e4c6d131b90134e3a338"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8cc575769ea11d074308c6d71cb10b036cdaec941562c07fc7431d956c502f0e"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22bf28f299c4158e6d0b5eaebddfd500c4973d947ffeaca8bcbe2e8c137dff0b"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:506555f98361db9c74e1323a862d77dcd7d64c2058829a368bf4159d986e339f"}, + {file = "tokenizers-0.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7061b0a28ade15906f5b2ec8c48d3bdd6e24eca6b427979af34954fbe31d5cef"}, + {file = "tokenizers-0.15.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7ed5e35507b7a0e2aac3285c4f5e37d4ec5cfc0e5825b862b68a0aaf2757af52"}, + {file = "tokenizers-0.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1c9df9247df0de6509dd751b1c086e5f124b220133b5c883bb691cb6fb3d786f"}, + {file = "tokenizers-0.15.1-cp310-none-win32.whl", hash = "sha256:dd999af1b4848bef1b11d289f04edaf189c269d5e6afa7a95fa1058644c3f021"}, + {file = "tokenizers-0.15.1-cp310-none-win_amd64.whl", hash = "sha256:39d06a57f7c06940d602fad98702cf7024c4eee7f6b9fe76b9f2197d5a4cc7e2"}, + {file = "tokenizers-0.15.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8ad034eb48bf728af06915e9294871f72fcc5254911eddec81d6df8dba1ce055"}, + {file = "tokenizers-0.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea9ede7c42f8fa90f31bfc40376fd91a7d83a4aa6ad38e6076de961d48585b26"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:b85d6fe1a20d903877aa0ef32ef6b96e81e0e48b71c206d6046ce16094de6970"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a7d44f656320137c7d643b9c7dcc1814763385de737fb98fd2643880910f597"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd244bd0793cdacf27ee65ec3db88c21f5815460e8872bbeb32b040469d6774e"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3f4a36e371b3cb1123adac8aeeeeab207ad32f15ed686d9d71686a093bb140"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2921a53966afb29444da98d56a6ccbef23feb3b0c0f294b4e502370a0a64f25"}, + {file = "tokenizers-0.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f49068cf51f49c231067f1a8c9fc075ff960573f6b2a956e8e1b0154fb638ea5"}, + {file = "tokenizers-0.15.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0ab1a22f20eaaab832ab3b00a0709ca44a0eb04721e580277579411b622c741c"}, + {file = "tokenizers-0.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:671268f24b607c4adc6fa2b5b580fd4211b9f84b16bd7f46d62f8e5be0aa7ba4"}, + {file = "tokenizers-0.15.1-cp311-none-win32.whl", hash = "sha256:a4f03e33d2bf7df39c8894032aba599bf90f6f6378e683a19d28871f09bb07fc"}, + {file = "tokenizers-0.15.1-cp311-none-win_amd64.whl", hash = "sha256:30f689537bcc7576d8bd4daeeaa2cb8f36446ba2f13f421b173e88f2d8289c4e"}, + {file = "tokenizers-0.15.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f3a379dd0898a82ea3125e8f9c481373f73bffce6430d4315f0b6cd5547e409"}, + {file = "tokenizers-0.15.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d870ae58bba347d38ac3fc8b1f662f51e9c95272d776dd89f30035c83ee0a4f"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:d6d28e0143ec2e253a8a39e94bf1d24776dbe73804fa748675dbffff4a5cd6d8"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61ae9ac9f44e2da128ee35db69489883b522f7abe033733fa54eb2de30dac23d"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8e322a47e29128300b3f7749a03c0ec2bce0a3dc8539ebff738d3f59e233542"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:760334f475443bc13907b1a8e1cb0aeaf88aae489062546f9704dce6c498bfe2"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b173753d4aca1e7d0d4cb52b5e3ffecfb0ca014e070e40391b6bb4c1d6af3f2"}, + {file = "tokenizers-0.15.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82c1f13d457c8f0ab17e32e787d03470067fe8a3b4d012e7cc57cb3264529f4a"}, + {file = "tokenizers-0.15.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:425b46ceff4505f20191df54b50ac818055d9d55023d58ae32a5d895b6f15bb0"}, + {file = "tokenizers-0.15.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:681ac6ba3b4fdaf868ead8971221a061f580961c386e9732ea54d46c7b72f286"}, + {file = "tokenizers-0.15.1-cp312-none-win32.whl", hash = "sha256:f2272656063ccfba2044df2115095223960d80525d208e7a32f6c01c351a6f4a"}, + {file = "tokenizers-0.15.1-cp312-none-win_amd64.whl", hash = "sha256:9abe103203b1c6a2435d248d5ff4cceebcf46771bfbc4957a98a74da6ed37674"}, + {file = "tokenizers-0.15.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2ce9ed5c8ef26b026a66110e3c7b73d93ec2d26a0b1d0ea55ddce61c0e5f446f"}, + {file = "tokenizers-0.15.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:89b24d366137986c3647baac29ef902d2d5445003d11c30df52f1bd304689aeb"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0faebedd01b413ab777ca0ee85914ed8b031ea5762ab0ea60b707ce8b9be6842"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdbd9dfcdad4f3b95d801f768e143165165055c18e44ca79a8a26de889cd8e85"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:97194324c12565b07e9993ca9aa813b939541185682e859fb45bb8d7d99b3193"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:485e43e2cc159580e0d83fc919ec3a45ae279097f634b1ffe371869ffda5802c"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:191d084d60e3589d6420caeb3f9966168269315f8ec7fbc3883122dc9d99759d"}, + {file = "tokenizers-0.15.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01c28cc8d7220634a75b14c53f4fc9d1b485f99a5a29306a999c115921de2897"}, + {file = "tokenizers-0.15.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:325212027745d3f8d5d5006bb9e5409d674eb80a184f19873f4f83494e1fdd26"}, + {file = "tokenizers-0.15.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3c5573603c36ce12dbe318bcfb490a94cad2d250f34deb2f06cb6937957bbb71"}, + {file = "tokenizers-0.15.1-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:1441161adb6d71a15a630d5c1d8659d5ebe41b6b209586fbeea64738e58fcbb2"}, + {file = "tokenizers-0.15.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:382a8d0c31afcfb86571afbfefa37186df90865ce3f5b731842dab4460e53a38"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e76959783e3f4ec73b3f3d24d4eec5aa9225f0bee565c48e77f806ed1e048f12"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:401df223e5eb927c5961a0fc6b171818a2bba01fb36ef18c3e1b69b8cd80e591"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c52606c233c759561a16e81b2290a7738c3affac7a0b1f0a16fe58dc22e04c7d"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b72c658bbe5a05ed8bc2ac5ad782385bfd743ffa4bc87d9b5026341e709c6f44"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25f5643a2f005c42f0737a326c6c6bdfedfdc9a994b10a1923d9c3e792e4d6a6"}, + {file = "tokenizers-0.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c5b6f633999d6b42466bbfe21be2e26ad1760b6f106967a591a41d8cbca980e"}, + {file = "tokenizers-0.15.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ceb5c9ad11a015150b545c1a11210966a45b8c3d68a942e57cf8938c578a77ca"}, + {file = "tokenizers-0.15.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bedd4ce0c4872db193444c395b11c7697260ce86a635ab6d48102d76be07d324"}, + {file = "tokenizers-0.15.1-cp37-none-win32.whl", hash = "sha256:cd6caef6c14f5ed6d35f0ddb78eab8ca6306d0cd9870330bccff72ad014a6f42"}, + {file = "tokenizers-0.15.1-cp37-none-win_amd64.whl", hash = "sha256:d2bd7af78f58d75a55e5df61efae164ab9200c04b76025f9cc6eeb7aff3219c2"}, + {file = "tokenizers-0.15.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:59b3ca6c02e0bd5704caee274978bd055de2dff2e2f39dadf536c21032dfd432"}, + {file = "tokenizers-0.15.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:48fe21b67c22583bed71933a025fd66b1f5cfae1baefa423c3d40379b5a6e74e"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3d190254c66a20fb1efbdf035e6333c5e1f1c73b1f7bfad88f9c31908ac2c2c4"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef90c8f5abf17d48d6635f5fd92ad258acd1d0c2d920935c8bf261782cfe7c8"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fac011ef7da3357aa7eb19efeecf3d201ede9618f37ddedddc5eb809ea0963ca"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:574ec5b3e71d1feda6b0ecac0e0445875729b4899806efbe2b329909ec75cb50"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aca16c3c0637c051a59ea99c4253f16fbb43034fac849076a7e7913b2b9afd2d"}, + {file = "tokenizers-0.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a6f238fc2bbfd3e12e8529980ec1624c7e5b69d4e959edb3d902f36974f725a"}, + {file = "tokenizers-0.15.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:587e11a26835b73c31867a728f32ca8a93c9ded4a6cd746516e68b9d51418431"}, + {file = "tokenizers-0.15.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6456e7ad397352775e2efdf68a9ec5d6524bbc4543e926eef428d36de627aed4"}, + {file = "tokenizers-0.15.1-cp38-none-win32.whl", hash = "sha256:614f0da7dd73293214bd143e6221cafd3f7790d06b799f33a987e29d057ca658"}, + {file = "tokenizers-0.15.1-cp38-none-win_amd64.whl", hash = "sha256:a4fa0a20d9f69cc2bf1cfce41aa40588598e77ec1d6f56bf0eb99769969d1ede"}, + {file = "tokenizers-0.15.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8d3f18a45e0cf03ce193d5900460dc2430eec4e14c786e5d79bddba7ea19034f"}, + {file = "tokenizers-0.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:38dbd6c38f88ad7d5dc5d70c764415d38fe3bcd99dc81638b572d093abc54170"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:777286b1f7e52de92aa4af49fe31046cfd32885d1bbaae918fab3bba52794c33"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58d4d550a3862a47dd249892d03a025e32286eb73cbd6bc887fb8fb64bc97165"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4eda68ce0344f35042ae89220b40a0007f721776b727806b5c95497b35714bb7"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cd33d15f7a3a784c3b665cfe807b8de3c6779e060349bd5005bb4ae5bdcb437"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a1aa370f978ac0bfb50374c3a40daa93fd56d47c0c70f0c79607fdac2ccbb42"}, + {file = "tokenizers-0.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:241482b940340fff26a2708cb9ba383a5bb8a2996d67a0ff2c4367bf4b86cc3a"}, + {file = "tokenizers-0.15.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:68f30b05f46a4d9aba88489eadd021904afe90e10a7950e28370d6e71b9db021"}, + {file = "tokenizers-0.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5a3c5d8025529670462b881b7b2527aacb6257398c9ec8e170070432c3ae3a82"}, + {file = "tokenizers-0.15.1-cp39-none-win32.whl", hash = "sha256:74d1827830f60a9d78da8f6d49a1fbea5422ce0eea42e2617877d23380a7efbc"}, + {file = "tokenizers-0.15.1-cp39-none-win_amd64.whl", hash = "sha256:9ff499923e4d6876d6b6a63ea84a56805eb35e91dd89b933a7aee0c56a3838c6"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b3aa007a0f4408f62a8471bdaa3faccad644cbf2622639f2906b4f9b5339e8b8"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:f3d4176fa93d8b2070db8f3c70dc21106ae6624fcaaa334be6bdd3a0251e729e"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1d0e463655ef8b2064df07bd4a445ed7f76f6da3b286b4590812587d42f80e89"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:089138fd0351b62215c462a501bd68b8df0e213edcf99ab9efd5dba7b4cb733e"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e563ac628f5175ed08e950430e2580e544b3e4b606a0995bb6b52b3a3165728"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:244dcc28c5fde221cb4373961b20da30097669005b122384d7f9f22752487a46"}, + {file = "tokenizers-0.15.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:d82951d46052dddae1369e68ff799a0e6e29befa9a0b46e387ae710fd4daefb0"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7b14296bc9059849246ceb256ffbe97f8806a9b5d707e0095c22db312f4fc014"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:0309357bb9b6c8d86cdf456053479d7112074b470651a997a058cd7ad1c4ea57"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083f06e9d8d01b70b67bcbcb7751b38b6005512cce95808be6bf34803534a7e7"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85288aea86ada579789447f0dcec108ebef8da4b450037eb4813d83e4da9371e"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:385e6fcb01e8de90c1d157ae2a5338b23368d0b1c4cc25088cdca90147e35d17"}, + {file = "tokenizers-0.15.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:60067edfcbf7d6cd448ac47af41ec6e84377efbef7be0c06f15a7c1dd069e044"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f7e37f89acfe237d4eaf93c3b69b0f01f407a7a5d0b5a8f06ba91943ea3cf10"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:6a63a15b523d42ebc1f4028e5a568013388c2aefa4053a263e511cb10aaa02f1"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2417d9e4958a6c2fbecc34c27269e74561c55d8823bf914b422e261a11fdd5fd"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8550974bace6210e41ab04231e06408cf99ea4279e0862c02b8d47e7c2b2828"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:194ba82129b171bcd29235a969e5859a93e491e9b0f8b2581f500f200c85cfdd"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:1bfd95eef8b01e6c0805dbccc8eaf41d8c5a84f0cce72c0ab149fe76aae0bce6"}, + {file = "tokenizers-0.15.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b87a15dd72f8216b03c151e3dace00c75c3fe7b0ee9643c25943f31e582f1a34"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6ac22f358a0c2a6c685be49136ce7ea7054108986ad444f567712cf274b34cd8"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e9d1f046a9b9d9a95faa103f07db5921d2c1c50f0329ebba4359350ee02b18b"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2a0fd30a4b74485f6a7af89fffb5fb84d6d5f649b3e74f8d37f624cc9e9e97cf"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80e45dc206b9447fa48795a1247c69a1732d890b53e2cc51ba42bc2fefa22407"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eaff56ef3e218017fa1d72007184401f04cb3a289990d2b6a0a76ce71c95f96"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:b41dc107e4a4e9c95934e79b025228bbdda37d9b153d8b084160e88d5e48ad6f"}, + {file = "tokenizers-0.15.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1922b8582d0c33488764bcf32e80ef6054f515369e70092729c928aae2284bc2"}, + {file = "tokenizers-0.15.1.tar.gz", hash = "sha256:c0a331d6d5a3d6e97b7f99f562cee8d56797180797bc55f12070e495e717c980"}, +] + +[package.dependencies] +huggingface_hub = ">=0.16.4,<1.0" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools_rust", "sphinx", "sphinx_rtd_theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"] + [[package]] name = "toml" version = "0.10.2" description = "Python Library for Tom's Obvious, Minimal Language" -category = "main" optional = true python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -5467,7 +6159,6 @@ files = [ name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -5479,7 +6170,6 @@ files = [ name = "tomlkit" version = "0.12.1" description = "Style preserving TOML library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5491,7 +6181,6 @@ files = [ name = "toolz" version = "0.12.0" description = "List processing tools and functional utilities" -category = "main" optional = true python-versions = ">=3.5" files = [ @@ -5503,7 +6192,6 @@ files = [ name = "tornado" version = "6.3.3" description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." -category = "main" optional = true python-versions = ">= 3.8" files = [ @@ -5524,7 +6212,6 @@ files = [ name = "tqdm" version = "4.66.1" description = "Fast, Extensible Progress Meter" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -5545,7 +6232,6 @@ telegram = ["requests"] name = "traitlets" version = "5.13.0" description = "Traitlets Python configuration system" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -5557,11 +6243,31 @@ files = [ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["argcomplete (>=3.0.3)", "mypy (>=1.6.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +[[package]] +name = "typer" +version = "0.9.0" +description = "Typer, build great CLIs. Easy to code. Based on Python type hints." +optional = true +python-versions = ">=3.6" +files = [ + {file = "typer-0.9.0-py3-none-any.whl", hash = "sha256:5d96d986a21493606a358cae4461bd8cdf83cbf33a5aa950ae629ca3b51467ee"}, + {file = "typer-0.9.0.tar.gz", hash = "sha256:50922fd79aea2f4751a8e0408ff10d2662bd0c8bbfa84755a699f3bada2978b2"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["colorama (>=0.4.3,<0.5.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] +dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "pre-commit (>=2.17.0,<3.0.0)"] +doc = ["cairosvg (>=2.5.2,<3.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pillow (>=9.3.0,<10.0.0)"] +test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.910)", "pytest (>=4.4.0,<8.0.0)", "pytest-cov (>=2.10.0,<5.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "pytest-xdist (>=1.32.0,<4.0.0)", "rich (>=10.11.0,<14.0.0)", "shellingham (>=1.3.0,<2.0.0)"] + [[package]] name = "typing-extensions" version = "4.8.0" description = "Backported and Experimental Type Hints for Python 3.8+" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -5573,7 +6279,6 @@ files = [ name = "typing-inspect" version = "0.9.0" description = "Runtime inspection utilities for typing module." -category = "main" optional = true python-versions = "*" files = [ @@ -5589,7 +6294,6 @@ typing-extensions = ">=3.7.4" name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" -category = "main" optional = true python-versions = ">=2" files = [ @@ -5601,7 +6305,6 @@ files = [ name = "tzlocal" version = "5.2" description = "tzinfo object for the local timezone" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5619,7 +6322,6 @@ devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3) name = "urllib3" version = "1.26.18" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -5632,11 +6334,80 @@ brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotl secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "uvicorn" +version = "0.27.0" +description = "The lightning-fast ASGI server." +optional = true +python-versions = ">=3.8" +files = [ + {file = "uvicorn-0.27.0-py3-none-any.whl", hash = "sha256:890b00f6c537d58695d3bb1f28e23db9d9e7a17cbcc76d7457c499935f933e24"}, + {file = "uvicorn-0.27.0.tar.gz", hash = "sha256:c855578045d45625fd027367f7653d249f7c49f9361ba15cf9624186b26b8eb6"}, +] + +[package.dependencies] +click = ">=7.0" +colorama = {version = ">=0.4", optional = true, markers = "sys_platform == \"win32\" and extra == \"standard\""} +h11 = ">=0.8" +httptools = {version = ">=0.5.0", optional = true, markers = "extra == \"standard\""} +python-dotenv = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +pyyaml = {version = ">=5.1", optional = true, markers = "extra == \"standard\""} +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} +uvloop = {version = ">=0.14.0,<0.15.0 || >0.15.0,<0.15.1 || >0.15.1", optional = true, markers = "(sys_platform != \"win32\" and sys_platform != \"cygwin\") and platform_python_implementation != \"PyPy\" and extra == \"standard\""} +watchfiles = {version = ">=0.13", optional = true, markers = "extra == \"standard\""} +websockets = {version = ">=10.4", optional = true, markers = "extra == \"standard\""} + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[[package]] +name = "uvloop" +version = "0.19.0" +description = "Fast implementation of asyncio event loop on top of libuv" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de4313d7f575474c8f5a12e163f6d89c0a878bc49219641d49e6f1444369a90e"}, + {file = "uvloop-0.19.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5588bd21cf1fcf06bded085f37e43ce0e00424197e7c10e77afd4bbefffef428"}, + {file = "uvloop-0.19.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b1fd71c3843327f3bbc3237bedcdb6504fd50368ab3e04d0410e52ec293f5b8"}, + {file = "uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a05128d315e2912791de6088c34136bfcdd0c7cbc1cf85fd6fd1bb321b7c849"}, + {file = "uvloop-0.19.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cd81bdc2b8219cb4b2556eea39d2e36bfa375a2dd021404f90a62e44efaaf957"}, + {file = "uvloop-0.19.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5f17766fb6da94135526273080f3455a112f82570b2ee5daa64d682387fe0dcd"}, + {file = "uvloop-0.19.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ce6b0af8f2729a02a5d1575feacb2a94fc7b2e983868b009d51c9a9d2149bef"}, + {file = "uvloop-0.19.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:31e672bb38b45abc4f26e273be83b72a0d28d074d5b370fc4dcf4c4eb15417d2"}, + {file = "uvloop-0.19.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:570fc0ed613883d8d30ee40397b79207eedd2624891692471808a95069a007c1"}, + {file = "uvloop-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5138821e40b0c3e6c9478643b4660bd44372ae1e16a322b8fc07478f92684e24"}, + {file = "uvloop-0.19.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:91ab01c6cd00e39cde50173ba4ec68a1e578fee9279ba64f5221810a9e786533"}, + {file = "uvloop-0.19.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:47bf3e9312f63684efe283f7342afb414eea4d3011542155c7e625cd799c3b12"}, + {file = "uvloop-0.19.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:da8435a3bd498419ee8c13c34b89b5005130a476bda1d6ca8cfdde3de35cd650"}, + {file = "uvloop-0.19.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:02506dc23a5d90e04d4f65c7791e65cf44bd91b37f24cfc3ef6cf2aff05dc7ec"}, + {file = "uvloop-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2693049be9d36fef81741fddb3f441673ba12a34a704e7b4361efb75cf30befc"}, + {file = "uvloop-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7010271303961c6f0fe37731004335401eb9075a12680738731e9c92ddd96ad6"}, + {file = "uvloop-0.19.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5daa304d2161d2918fa9a17d5635099a2f78ae5b5960e742b2fcfbb7aefaa593"}, + {file = "uvloop-0.19.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7207272c9520203fea9b93843bb775d03e1cf88a80a936ce760f60bb5add92f3"}, + {file = "uvloop-0.19.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:78ab247f0b5671cc887c31d33f9b3abfb88d2614b84e4303f1a63b46c046c8bd"}, + {file = "uvloop-0.19.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:472d61143059c84947aa8bb74eabbace30d577a03a1805b77933d6bd13ddebbd"}, + {file = "uvloop-0.19.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45bf4c24c19fb8a50902ae37c5de50da81de4922af65baf760f7c0c42e1088be"}, + {file = "uvloop-0.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271718e26b3e17906b28b67314c45d19106112067205119dddbd834c2b7ce797"}, + {file = "uvloop-0.19.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:34175c9fd2a4bc3adc1380e1261f60306344e3407c20a4d684fd5f3be010fa3d"}, + {file = "uvloop-0.19.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e27f100e1ff17f6feeb1f33968bc185bf8ce41ca557deee9d9bbbffeb72030b7"}, + {file = "uvloop-0.19.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13dfdf492af0aa0a0edf66807d2b465607d11c4fa48f4a1fd41cbea5b18e8e8b"}, + {file = "uvloop-0.19.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6e3d4e85ac060e2342ff85e90d0c04157acb210b9ce508e784a944f852a40e67"}, + {file = "uvloop-0.19.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca4956c9ab567d87d59d49fa3704cf29e37109ad348f2d5223c9bf761a332e7"}, + {file = "uvloop-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f467a5fd23b4fc43ed86342641f3936a68ded707f4627622fa3f82a120e18256"}, + {file = "uvloop-0.19.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:492e2c32c2af3f971473bc22f086513cedfc66a130756145a931a90c3958cb17"}, + {file = "uvloop-0.19.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2df95fca285a9f5bfe730e51945ffe2fa71ccbfdde3b0da5772b4ee4f2e770d5"}, + {file = "uvloop-0.19.0.tar.gz", hash = "sha256:0246f4fd1bf2bf702e06b0d45ee91677ee5c31242f39aab4ea6fe0c51aedd0fd"}, +] + +[package.extras] +docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] +test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] + [[package]] name = "validators" version = "0.22.0" description = "Python Data Validation for Humans™" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5659,7 +6430,6 @@ tooling-extras = ["pyaml (>=23.7.0)", "pypandoc-binary (>=1.11)", "pytest (>=7.4 name = "virtualenv" version = "20.24.6" description = "Virtual Python Environment builder" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -5680,7 +6450,6 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -5716,11 +6485,97 @@ files = [ [package.extras] watchmedo = ["PyYAML (>=3.10)"] +[[package]] +name = "watchfiles" +version = "0.21.0" +description = "Simple, modern and high performance file watching and code reload in python." +optional = true +python-versions = ">=3.8" +files = [ + {file = "watchfiles-0.21.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:27b4035013f1ea49c6c0b42d983133b136637a527e48c132d368eb19bf1ac6aa"}, + {file = "watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c81818595eff6e92535ff32825f31c116f867f64ff8cdf6562cd1d6b2e1e8f3e"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6c107ea3cf2bd07199d66f156e3ea756d1b84dfd43b542b2d870b77868c98c03"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d9ac347653ebd95839a7c607608703b20bc07e577e870d824fa4801bc1cb124"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5eb86c6acb498208e7663ca22dbe68ca2cf42ab5bf1c776670a50919a56e64ab"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f564bf68404144ea6b87a78a3f910cc8de216c6b12a4cf0b27718bf4ec38d303"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d0f32ebfaa9c6011f8454994f86108c2eb9c79b8b7de00b36d558cadcedaa3d"}, + {file = "watchfiles-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d45d9b699ecbac6c7bd8e0a2609767491540403610962968d258fd6405c17c"}, + {file = "watchfiles-0.21.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:aff06b2cac3ef4616e26ba17a9c250c1fe9dd8a5d907d0193f84c499b1b6e6a9"}, + {file = "watchfiles-0.21.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d9792dff410f266051025ecfaa927078b94cc7478954b06796a9756ccc7e14a9"}, + {file = "watchfiles-0.21.0-cp310-none-win32.whl", hash = "sha256:214cee7f9e09150d4fb42e24919a1e74d8c9b8a9306ed1474ecaddcd5479c293"}, + {file = "watchfiles-0.21.0-cp310-none-win_amd64.whl", hash = "sha256:1ad7247d79f9f55bb25ab1778fd47f32d70cf36053941f07de0b7c4e96b5d235"}, + {file = "watchfiles-0.21.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:668c265d90de8ae914f860d3eeb164534ba2e836811f91fecc7050416ee70aa7"}, + {file = "watchfiles-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a23092a992e61c3a6a70f350a56db7197242f3490da9c87b500f389b2d01eef"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:e7941bbcfdded9c26b0bf720cb7e6fd803d95a55d2c14b4bd1f6a2772230c586"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11cd0c3100e2233e9c53106265da31d574355c288e15259c0d40a4405cbae317"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78f30cbe8b2ce770160d3c08cff01b2ae9306fe66ce899b73f0409dc1846c1b"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6674b00b9756b0af620aa2a3346b01f8e2a3dc729d25617e1b89cf6af4a54eb1"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd7ac678b92b29ba630d8c842d8ad6c555abda1b9ef044d6cc092dacbfc9719d"}, + {file = "watchfiles-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c873345680c1b87f1e09e0eaf8cf6c891b9851d8b4d3645e7efe2ec20a20cc7"}, + {file = "watchfiles-0.21.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:49f56e6ecc2503e7dbe233fa328b2be1a7797d31548e7a193237dcdf1ad0eee0"}, + {file = "watchfiles-0.21.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:02d91cbac553a3ad141db016e3350b03184deaafeba09b9d6439826ee594b365"}, + {file = "watchfiles-0.21.0-cp311-none-win32.whl", hash = "sha256:ebe684d7d26239e23d102a2bad2a358dedf18e462e8808778703427d1f584400"}, + {file = "watchfiles-0.21.0-cp311-none-win_amd64.whl", hash = "sha256:4566006aa44cb0d21b8ab53baf4b9c667a0ed23efe4aaad8c227bfba0bf15cbe"}, + {file = "watchfiles-0.21.0-cp311-none-win_arm64.whl", hash = "sha256:c550a56bf209a3d987d5a975cdf2063b3389a5d16caf29db4bdddeae49f22078"}, + {file = "watchfiles-0.21.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:51ddac60b96a42c15d24fbdc7a4bfcd02b5a29c047b7f8bf63d3f6f5a860949a"}, + {file = "watchfiles-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:511f0b034120cd1989932bf1e9081aa9fb00f1f949fbd2d9cab6264916ae89b1"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cfb92d49dbb95ec7a07511bc9efb0faff8fe24ef3805662b8d6808ba8409a71a"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f92944efc564867bbf841c823c8b71bb0be75e06b8ce45c084b46411475a915"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:642d66b75eda909fd1112d35c53816d59789a4b38c141a96d62f50a3ef9b3360"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d23bcd6c8eaa6324fe109d8cac01b41fe9a54b8c498af9ce464c1aeeb99903d6"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18d5b4da8cf3e41895b34e8c37d13c9ed294954907929aacd95153508d5d89d7"}, + {file = "watchfiles-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b8d1eae0f65441963d805f766c7e9cd092f91e0c600c820c764a4ff71a0764c"}, + {file = "watchfiles-0.21.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1fd9a5205139f3c6bb60d11f6072e0552f0a20b712c85f43d42342d162be1235"}, + {file = "watchfiles-0.21.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a1e3014a625bcf107fbf38eece0e47fa0190e52e45dc6eee5a8265ddc6dc5ea7"}, + {file = "watchfiles-0.21.0-cp312-none-win32.whl", hash = "sha256:9d09869f2c5a6f2d9df50ce3064b3391d3ecb6dced708ad64467b9e4f2c9bef3"}, + {file = "watchfiles-0.21.0-cp312-none-win_amd64.whl", hash = "sha256:18722b50783b5e30a18a8a5db3006bab146d2b705c92eb9a94f78c72beb94094"}, + {file = "watchfiles-0.21.0-cp312-none-win_arm64.whl", hash = "sha256:a3b9bec9579a15fb3ca2d9878deae789df72f2b0fdaf90ad49ee389cad5edab6"}, + {file = "watchfiles-0.21.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:4ea10a29aa5de67de02256a28d1bf53d21322295cb00bd2d57fcd19b850ebd99"}, + {file = "watchfiles-0.21.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:40bca549fdc929b470dd1dbfcb47b3295cb46a6d2c90e50588b0a1b3bd98f429"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:9b37a7ba223b2f26122c148bb8d09a9ff312afca998c48c725ff5a0a632145f7"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec8c8900dc5c83650a63dd48c4d1d245343f904c4b64b48798c67a3767d7e165"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ad3fe0a3567c2f0f629d800409cd528cb6251da12e81a1f765e5c5345fd0137"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d353c4cfda586db2a176ce42c88f2fc31ec25e50212650c89fdd0f560ee507b"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:83a696da8922314ff2aec02987eefb03784f473281d740bf9170181829133765"}, + {file = "watchfiles-0.21.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a03651352fc20975ee2a707cd2d74a386cd303cc688f407296064ad1e6d1562"}, + {file = "watchfiles-0.21.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3ad692bc7792be8c32918c699638b660c0de078a6cbe464c46e1340dadb94c19"}, + {file = "watchfiles-0.21.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06247538e8253975bdb328e7683f8515ff5ff041f43be6c40bff62d989b7d0b0"}, + {file = "watchfiles-0.21.0-cp38-none-win32.whl", hash = "sha256:9a0aa47f94ea9a0b39dd30850b0adf2e1cd32a8b4f9c7aa443d852aacf9ca214"}, + {file = "watchfiles-0.21.0-cp38-none-win_amd64.whl", hash = "sha256:8d5f400326840934e3507701f9f7269247f7c026d1b6cfd49477d2be0933cfca"}, + {file = "watchfiles-0.21.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:7f762a1a85a12cc3484f77eee7be87b10f8c50b0b787bb02f4e357403cad0c0e"}, + {file = "watchfiles-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6e9be3ef84e2bb9710f3f777accce25556f4a71e15d2b73223788d528fcc2052"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:4c48a10d17571d1275701e14a601e36959ffada3add8cdbc9e5061a6e3579a5d"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c889025f59884423428c261f212e04d438de865beda0b1e1babab85ef4c0f01"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:66fac0c238ab9a2e72d026b5fb91cb902c146202bbd29a9a1a44e8db7b710b6f"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4a21f71885aa2744719459951819e7bf5a906a6448a6b2bbce8e9cc9f2c8128"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c9198c989f47898b2c22201756f73249de3748e0fc9de44adaf54a8b259cc0c"}, + {file = "watchfiles-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8f57c4461cd24fda22493109c45b3980863c58a25b8bec885ca8bea6b8d4b28"}, + {file = "watchfiles-0.21.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:853853cbf7bf9408b404754b92512ebe3e3a83587503d766d23e6bf83d092ee6"}, + {file = "watchfiles-0.21.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d5b1dc0e708fad9f92c296ab2f948af403bf201db8fb2eb4c8179db143732e49"}, + {file = "watchfiles-0.21.0-cp39-none-win32.whl", hash = "sha256:59137c0c6826bd56c710d1d2bda81553b5e6b7c84d5a676747d80caf0409ad94"}, + {file = "watchfiles-0.21.0-cp39-none-win_amd64.whl", hash = "sha256:6cb8fdc044909e2078c248986f2fc76f911f72b51ea4a4fbbf472e01d14faa58"}, + {file = "watchfiles-0.21.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ab03a90b305d2588e8352168e8c5a1520b721d2d367f31e9332c4235b30b8994"}, + {file = "watchfiles-0.21.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:927c589500f9f41e370b0125c12ac9e7d3a2fd166b89e9ee2828b3dda20bfe6f"}, + {file = "watchfiles-0.21.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bd467213195e76f838caf2c28cd65e58302d0254e636e7c0fca81efa4a2e62c"}, + {file = "watchfiles-0.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02b73130687bc3f6bb79d8a170959042eb56eb3a42df3671c79b428cd73f17cc"}, + {file = "watchfiles-0.21.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:08dca260e85ffae975448e344834d765983237ad6dc308231aa16e7933db763e"}, + {file = "watchfiles-0.21.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3ccceb50c611c433145502735e0370877cced72a6c70fd2410238bcbc7fe51d8"}, + {file = "watchfiles-0.21.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57d430f5fb63fea141ab71ca9c064e80de3a20b427ca2febcbfcef70ff0ce895"}, + {file = "watchfiles-0.21.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dd5fad9b9c0dd89904bbdea978ce89a2b692a7ee8a0ce19b940e538c88a809c"}, + {file = "watchfiles-0.21.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:be6dd5d52b73018b21adc1c5d28ac0c68184a64769052dfeb0c5d9998e7f56a2"}, + {file = "watchfiles-0.21.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b3cab0e06143768499384a8a5efb9c4dc53e19382952859e4802f294214f36ec"}, + {file = "watchfiles-0.21.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6ed10c2497e5fedadf61e465b3ca12a19f96004c15dcffe4bd442ebadc2d85"}, + {file = "watchfiles-0.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43babacef21c519bc6631c5fce2a61eccdfc011b4bcb9047255e9620732c8097"}, + {file = "watchfiles-0.21.0.tar.gz", hash = "sha256:c76c635fabf542bb78524905718c39f736a98e5ab25b23ec6d4abede1a85a6a3"}, +] + +[package.dependencies] +anyio = ">=3.0.0" + [[package]] name = "wcwidth" version = "0.2.9" description = "Measures the displayed width of unicode strings in a terminal" -category = "main" optional = false python-versions = "*" files = [ @@ -5732,7 +6587,6 @@ files = [ name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -category = "main" optional = true python-versions = "*" files = [ @@ -5740,11 +6594,107 @@ files = [ {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, ] +[[package]] +name = "websocket-client" +version = "1.7.0" +description = "WebSocket client for Python with low level API options" +optional = true +python-versions = ">=3.8" +files = [ + {file = "websocket-client-1.7.0.tar.gz", hash = "sha256:10e511ea3a8c744631d3bd77e61eb17ed09304c413ad42cf6ddfa4c7787e8fe6"}, + {file = "websocket_client-1.7.0-py3-none-any.whl", hash = "sha256:f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588"}, +] + +[package.extras] +docs = ["Sphinx (>=6.0)", "sphinx-rtd-theme (>=1.1.0)"] +optional = ["python-socks", "wsaccel"] +test = ["websockets"] + +[[package]] +name = "websockets" +version = "12.0" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = true +python-versions = ">=3.8" +files = [ + {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"}, + {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"}, + {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"}, + {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"}, + {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"}, + {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"}, + {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"}, + {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"}, + {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"}, + {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"}, + {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"}, + {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"}, + {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"}, + {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"}, + {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"}, + {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"}, + {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"}, + {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"}, + {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"}, + {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"}, + {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"}, + {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"}, + {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"}, + {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"}, + {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"}, + {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"}, + {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"}, + {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"}, + {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"}, + {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"}, + {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, +] + [[package]] name = "werkzeug" version = "3.0.1" description = "The comprehensive WSGI web application library." -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -5758,11 +6708,89 @@ MarkupSafe = ">=2.1.1" [package.extras] watchdog = ["watchdog (>=2.3)"] +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = true +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + [[package]] name = "yarl" version = "1.9.2" description = "Yet another URL library" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -5850,7 +6878,6 @@ multidict = ">=4.0" name = "yfinance" version = "0.2.31" description = "Download market data from Yahoo! Finance API" -category = "main" optional = true python-versions = "*" files = [ @@ -5875,7 +6902,6 @@ requests = ">=2.31" name = "zipp" version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -5888,7 +6914,8 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [extras] -connectors = ["psycopg2", "pymysql", "snowflake-sqlalchemy", "sqlalchemy-databricks"] +chromadb = ["chromadb"] +connectors = ["psycopg2", "pymysql", "snowflake-sqlalchemy", "sqlalchemy-bigquery", "sqlalchemy-databricks"] excel = ["openpyxl"] ggplot = ["ggplot"] google-ai = ["google-cloud-aiplatform", "google-generativeai"] @@ -5907,5 +6934,5 @@ yfinance = ["yfinance"] [metadata] lock-version = "2.0" -python-versions = ">=3.9,<3.9.7 || >3.9.7,<4.0" -content-hash = "976a14fa71fc03bc950fd3024702185dceabd1e80bf36d2d18ed7d80824be881" +python-versions = ">=3.9,<3.9.7 || >3.9.7,<3.13" +content-hash = "7bce70456b621a6d1db50a41bd1f311f4424b0cc24484a9b62b9c93fbda41758" diff --git a/pyproject.toml b/pyproject.toml index 4b033436d..e0857267d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pandasai" -version = "1.5.21" +version = "2.0" description = "PandasAI is a Python library that integrates generative artificial intelligence capabilities into Pandas, making dataframes conversational." authors = ["Gabriele Venturi"] license = "MIT" @@ -21,6 +21,7 @@ duckdb = "^0.9.2" faker = "^19.12.0" pillow = "^10.1.0" requests = "^2.31.0" +jinja2 = "^3.1.3" modin = {version = "0.18.1", optional = true, extras=["ray"]} beautifulsoup4 = {version="^4.12.2", optional = true} google-generativeai = {version = "^0.3.2", optional = true} @@ -44,7 +45,8 @@ yfinance = { version = "^0.2.28", optional = true } sqlalchemy-databricks = { version = "^0.2.0", optional = true } snowflake-sqlalchemy = { version = "^1.5.0", optional = true } flask = { version = "^3.0.2", optional = true } - +sqlalchemy-bigquery = { version = "^1.8.0", optional = true } +chromadb = {version = "^0.4.22", optional = true} [tool.poetry.group.dev] optional = true @@ -63,7 +65,7 @@ sourcery = "^1.11.0" [tool.poetry.extras] -connectors = [ "pymysql", "psycopg2", "sqlalchemy-databricks", "snowflake-sqlalchemy"] +connectors = [ "pymysql", "psycopg2", "sqlalchemy-databricks", "sqlalchemy-bigquery", "snowflake-sqlalchemy"] google-ai = ["google-generativeai", "google-cloud-aiplatform"] google-sheets = ["beautifulsoup4"] excel = ["openpyxl"] @@ -79,6 +81,7 @@ streamlit = ["streamlit"] text-generation = ["fsspec", "huggingface-hub", "text-generation"] yfinance = ["yfinance"] modin = ["modin", "ray"] +chromadb = ["chromadb"] [tool.poetry.group.docs] optional = true diff --git a/tests/integration_tests/test_cricket.py b/tests/integration_tests/test_cricket.py index 0388ba041..a2c6ab695 100644 --- a/tests/integration_tests/test_cricket.py +++ b/tests/integration_tests/test_cricket.py @@ -1,6 +1,8 @@ import unittest -from pandasai import SmartDataframe +import pandas as pd + +from pandasai.agent.base import Agent from pandasai.llm import OpenAI from . import PATH_DATA @@ -10,7 +12,13 @@ class TestCricket(unittest.TestCase): def setUp(self) -> None: # export OPENAI_API_KEY='sk-...' llm = OpenAI(temperature=0) - self.df = SmartDataframe(f"{PATH_DATA}/cricket data.csv", config={"llm": llm}) + + csv_file_path = f"{PATH_DATA}/cricket data.csv" + + # Read the CSV file into a DataFrame + df = pd.read_csv(csv_file_path) + + self.df = Agent([df], config={"llm": llm}) def test_number_response(self): response = self.df.chat( diff --git a/tests/integration_tests/test_gin.py b/tests/integration_tests/test_gin.py index 765573773..89259dbb3 100644 --- a/tests/integration_tests/test_gin.py +++ b/tests/integration_tests/test_gin.py @@ -1,6 +1,8 @@ import unittest -from pandasai import SmartDataframe +import pandas as pd + +from pandasai.agent.base import Agent from pandasai.llm import OpenAI from . import PATH_DATA @@ -10,7 +12,13 @@ class TestGin(unittest.TestCase): def setUp(self) -> None: # export OPENAI_API_KEY='sk-...' llm = OpenAI(temperature=0) - self.df = SmartDataframe(f"{PATH_DATA}/Gins_List.csv", config={"llm": llm}) + + csv_file_path = f"{PATH_DATA}/Gins_List.csv" + + # Read the CSV file into a DataFrame + df = pd.read_csv(csv_file_path) + + self.df = Agent([df], config={"llm": llm}) def test_number_response(self): response = self.df.chat("Average price of Gin rounded off", "number") diff --git a/tests/integration_tests/test_loan_payments.py b/tests/integration_tests/test_loan_payments.py index 3d214b81d..d14e08e50 100644 --- a/tests/integration_tests/test_loan_payments.py +++ b/tests/integration_tests/test_loan_payments.py @@ -1,6 +1,8 @@ import unittest -from pandasai import SmartDataframe +import pandas as pd + +from pandasai.agent.base import Agent from pandasai.llm import OpenAI @@ -8,9 +10,13 @@ class TestLoanPayments(unittest.TestCase): def setUp(self) -> None: # export OPENAI_API_KEY='sk-...' llm = OpenAI(temperature=0) - self.df = SmartDataframe( - "examples/data/Loan payments data.csv", config={"llm": llm} - ) + + csv_file_path = "examples/data/Loan payments data.csv" + + # Read the CSV file into a DataFrame + df = pd.read_csv(csv_file_path) + + self.df = Agent([df], config={"llm": llm}) def test_number_response(self): response = self.df.chat( diff --git a/tests/integration_tests/test_new_york_housing_market.py b/tests/integration_tests/test_new_york_housing_market.py index a8a187c16..d02ca935d 100644 --- a/tests/integration_tests/test_new_york_housing_market.py +++ b/tests/integration_tests/test_new_york_housing_market.py @@ -1,6 +1,8 @@ import unittest -from pandasai import SmartDataframe +import pandas as pd + +from pandasai.agent.base import Agent from pandasai.llm import OpenAI from . import PATH_DATA @@ -10,9 +12,13 @@ class TestNewYorkHousing(unittest.TestCase): def setUp(self) -> None: # export OPENAI_API_KEY='sk-...' llm = OpenAI(temperature=0) - self.df = SmartDataframe( - f"{PATH_DATA}/NY-House-Dataset.csv", config={"llm": llm} - ) + + csv_file_path = f"{PATH_DATA}/NY-House-Dataset.csv" + + # Read the CSV file into a DataFrame + df = pd.read_csv(csv_file_path) + + self.df = Agent([df], config={"llm": llm}) def test_number_response(self): response = self.df.chat("Average price of Condo for sale", "number") diff --git a/tests/integration_tests/test_spotify.py b/tests/integration_tests/test_spotify.py index e99c81f57..66b6ded4a 100644 --- a/tests/integration_tests/test_spotify.py +++ b/tests/integration_tests/test_spotify.py @@ -1,6 +1,8 @@ import unittest -from pandasai import SmartDataframe +import pandas as pd + +from pandasai.agent import Agent from pandasai.llm import OpenAI from . import PATH_DATA @@ -10,7 +12,12 @@ class TestSpotify(unittest.TestCase): def setUp(self) -> None: # export OPENAI_API_KEY='sk-...' llm = OpenAI(temperature=0) - self.df = SmartDataframe(f"{PATH_DATA}/artists.csv", config={"llm": llm}) + csv_file_path = f"{PATH_DATA}/artists.csv" + + # Read the CSV file into a DataFrame + df = pd.read_csv(csv_file_path) + + self.df = Agent([df], config={"llm": llm}) def test_number_response(self): response = self.df.chat("streams of Imagine Dragons", "number") diff --git a/tests/unit_tests/test_agent.py b/tests/unit_tests/_tests_agent.py similarity index 100% rename from tests/unit_tests/test_agent.py rename to tests/unit_tests/_tests_agent.py diff --git a/tests/unit_tests/agent/test_agent.py b/tests/unit_tests/agent/test_agent.py new file mode 100644 index 000000000..4cf951057 --- /dev/null +++ b/tests/unit_tests/agent/test_agent.py @@ -0,0 +1,561 @@ +import os +import sys +from typing import Optional +from unittest.mock import MagicMock, Mock, patch + +import pandas as pd +import pytest +from langchain import OpenAI + +from pandasai.agent import Agent +from pandasai.connectors.sql import ( + PostgreSQLConnector, + SQLConnector, + SQLConnectorConfig, +) +from pandasai.constants import DEFAULT_FILE_PERMISSIONS +from pandasai.helpers.code_manager import CodeManager +from pandasai.helpers.dataframe_serializer import DataframeSerializerType +from pandasai.llm.fake import FakeLLM +from pandasai.llm.langchain import LangchainLLM +from pandasai.prompts.clarification_questions_prompt import ClarificationQuestionPrompt +from pandasai.prompts.explain_prompt import ExplainPrompt + + +class TestAgent: + "Unit tests for Agent class" + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def llm(self, output: Optional[str] = None) -> FakeLLM: + return FakeLLM(output=output) + + @pytest.fixture + def config(self, llm: FakeLLM) -> dict: + return {"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV} + + @pytest.fixture + def agent(self, sample_df: pd.DataFrame, config: dict) -> Agent: + return Agent(sample_df, config, vectorstore=MagicMock()) + + @pytest.fixture + @patch("pandasai.connectors.sql.create_engine", autospec=True) + def sql_connector(self, create_engine): + # Define your ConnectorConfig instance here + self.config = SQLConnectorConfig( + dialect="mysql", + driver="pymysql", + username="your_username", + password="your_password", + host="your_host", + port=443, + database="your_database", + table="your_table", + where=[["column_name", "=", "value"]], + ).dict() + + # Create an instance of SQLConnector + return SQLConnector(self.config) + + @pytest.fixture + @patch("pandasai.connectors.sql.create_engine", autospec=True) + def pgsql_connector(self, create_engine): + # Define your ConnectorConfig instance here + self.config = SQLConnectorConfig( + dialect="mysql", + driver="pymysql", + username="your_username", + password="your_password", + host="your_host", + port=443, + database="your_database", + table="your_table", + where=[["column_name", "=", "value"]], + ).dict() + + # Create an instance of SQLConnector + return PostgreSQLConnector(self.config) + + def test_constructor(self, sample_df, config): + agent_1 = Agent(sample_df, config) + agent_2 = Agent([sample_df], config) + + # test multiple agents instances data overlap + agent_1.context.memory.add("Which country has the highest gdp?", True) + memory = agent_1.context.memory.all() + assert len(memory) == 1 + + memory = agent_2.context.memory.all() + assert len(memory) == 0 + + def test_chat(self, sample_df, config): + # Create an Agent instance for testing + agent = Agent(sample_df, config) + agent.chat = Mock() + agent.chat.return_value = "United States has the highest gdp" + # Test the chat function + response = agent.chat("Which country has the highest gdp?") + assert agent.chat.called + assert isinstance(response, str) + assert response == "United States has the highest gdp" + + def test_start_new_conversation(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.memory.add("Which country has the highest gdp?", True) + memory = agent.context.memory.all() + assert len(memory) == 1 + agent.start_new_conversation() + memory = agent.context.memory.all() + assert len(memory) == 0 + + def test_clarification_questions(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.config.llm.call = Mock() + clarification_response = ( + '["What is happiest index for you?", "What is unit of measure for gdp?"]' + ) + agent.context.config.llm.call.return_value = clarification_response + + questions = agent.clarification_questions("What is the happiest country?") + assert len(questions) == 2 + assert questions[0] == "What is happiest index for you?" + assert questions[1] == "What is unit of measure for gdp?" + + def test_clarification_questions_failure(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.config.llm.call = Mock() + + agent.context.config.llm.call.return_value = Exception( + "This is a mock exception" + ) + + with pytest.raises(Exception): + agent.clarification_questions("What is the happiest country?") + + def test_clarification_questions_fail_non_json(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.config.llm.call = Mock() + + agent.context.config.llm.call.return_value = "This is not json response" + + with pytest.raises(Exception): + agent.clarification_questions("What is the happiest country?") + + def test_clarification_questions_max_3(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.config.llm.call = Mock() + clarification_response = ( + '["What is happiest index for you", ' + '"What is unit of measure for gdp", ' + '"How many countries are involved in the survey", ' + '"How do you want this data to be represented"]' + ) + agent.context.config.llm.call.return_value = clarification_response + + questions = agent.clarification_questions("What is the happiest country?") + + assert isinstance(questions, list) + assert len(questions) == 3 + + def test_explain(self, agent: Agent): + agent.context.config.llm.call = Mock() + clarification_response = """ +Combine the Data: To find out who gets paid the most, +I needed to match the names of people with the amounts of money they earn. +It's like making sure the right names are next to the right amounts. +I used a method to do this, like connecting pieces of a puzzle. +Find the Top Earner: After combining the data, I looked through it to find +the person with the most money. +It's like finding the person who has the most marbles in a game + """ + agent.context.config.llm.call.return_value = clarification_response + + response = agent.explain() + + assert response == ( + """ +Combine the Data: To find out who gets paid the most, +I needed to match the names of people with the amounts of money they earn. +It's like making sure the right names are next to the right amounts. +I used a method to do this, like connecting pieces of a puzzle. +Find the Top Earner: After combining the data, I looked through it to find +the person with the most money. +It's like finding the person who has the most marbles in a game + """ + ) + + def test_call_prompt_success(self, agent: Agent): + agent.context.config.llm.call = Mock() + clarification_response = """ +What is expected Salary Increase? + """ + agent.context.config.llm.call.return_value = clarification_response + prompt = ExplainPrompt( + context=agent.context, + code="test code", + ) + agent.call_llm_with_prompt(prompt) + assert agent.context.config.llm.call.call_count == 1 + + def test_call_prompt_max_retries_exceeds(self, agent: Agent): + # raises exception every time + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.side_effect = Exception("Raise an exception") + with pytest.raises(Exception): + agent.call_llm_with_prompt("Test Prompt") + + assert agent.context.config.llm.call.call_count == 3 + + def test_call_prompt_max_retry_on_error(self, agent: Agent): + # test the LLM call failed twice but succeed third time + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.side_effect = [ + Exception(), + Exception(), + "LLM Result", + ] + prompt = ExplainPrompt( + context=agent.context, + code="test code", + ) + result = agent.call_llm_with_prompt(prompt) + assert result == "LLM Result" + assert agent.context.config.llm.call.call_count == 3 + + def test_call_prompt_max_retry_twice(self, agent: Agent): + # test the LLM call failed once but succeed second time + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.side_effect = [Exception(), "LLM Result"] + prompt = ExplainPrompt( + context=agent.context, + code="test code", + ) + result = agent.call_llm_with_prompt(prompt) + + assert result == "LLM Result" + assert agent.context.config.llm.call.call_count == 2 + + def test_call_llm_with_prompt_no_retry_on_error(self, agent: Agent): + # Test when LLM call raises an exception but retries are disabled + + agent.context.config.use_error_correction_framework = False + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.side_effect = Exception() + with pytest.raises(Exception): + agent.call_llm_with_prompt("Test Prompt") + + assert agent.context.config.llm.call.call_count == 1 + + def test_call_llm_with_prompt_max_retries_check(self, agent: Agent): + # Test when LLM call raises an exception, but called call function + # 'max_retries' time + + agent.context.config.max_retries = 5 + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.side_effect = Exception() + + with pytest.raises(Exception): + agent.call_llm_with_prompt("Test Prompt") + + assert agent.context.config.llm.call.call_count == 5 + + def test_clarification_prompt_validate_output_false_case(self, agent: Agent): + # Test whether the output is json or not + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.return_value = "This is not json" + + prompt = ClarificationQuestionPrompt( + context=agent.context, + query="test query", + ) + with pytest.raises(Exception): + agent.call_llm_with_prompt(prompt) + + def test_clarification_prompt_validate_output_true_case(self, agent: Agent): + # Test whether the output is json or not + agent.context.config.llm.call = Mock() + agent.context.config.llm.call.return_value = '["This is test question"]' + + prompt = ClarificationQuestionPrompt( + context=agent.context, + query="test query", + ) + result = agent.call_llm_with_prompt(prompt) + # Didn't raise any exception + assert isinstance(result, str) + + def test_rephrase(self, sample_df, config): + agent = Agent(sample_df, config, memory_size=10) + agent.context.config.llm.call = Mock() + clarification_response = """ +How much has the total salary expense increased? + """ + agent.context.config.llm.call.return_value = clarification_response + + response = agent.rephrase_query("how much has the revenue increased?") + + assert response == ( + """ +How much has the total salary expense increased? + """ + ) + + def test_load_llm_with_pandasai_llm(self, agent: Agent, llm): + assert agent.get_llm(llm) == llm + + def test_load_llm_with_langchain_llm(self, agent: Agent, llm): + langchain_llm = OpenAI(openai_api_key="fake_key") + + llm = agent.get_llm(langchain_llm) + assert isinstance(llm, LangchainLLM) + assert llm.langchain_llm == langchain_llm + + @patch.object( + CodeManager, + "execute_code", + return_value={ + "type": "string", + "value": "There are 10 countries in the dataframe.", + }, + ) + def test_last_result_is_saved(self, _mocked_method, agent: Agent): + assert agent.last_result is None + + _mocked_method.__name__ = "execute_code" + + agent.chat("How many countries are in the dataframe?") + assert agent.last_result == { + "type": "string", + "value": "There are 10 countries in the dataframe.", + } + + @patch.object( + CodeManager, + "execute_code", + return_value={ + "type": "string", + "value": "There are 10 countries in the dataframe.", + }, + ) + @patch("pandasai.helpers.query_exec_tracker.QueryExecTracker.publish") + def test_query_tracker_publish_called_in_chat_method( + self, mock_query_tracker_publish, _mocked_method, agent: Agent + ): + assert agent.last_result is None + + _mocked_method.__name__ = "execute_code" + + agent.chat("How many countries are in the dataframe?") + mock_query_tracker_publish.assert_called() + + @patch( + "pandasai.pipelines.chat.code_execution.CodeManager.execute_code", + autospec=True, + ) + @patch( + "pandasai.pipelines.chat.code_generator.CodeGenerator.execute", + autospec=True, + ) + @patch( + "pandasai.pipelines.chat.code_execution.traceback.format_exc", + autospec=True, + ) + def test_retry_on_error_with_single_df( + self, + mock_traceback, + mock_generate, + mock_execute, + agent: Agent, + ): + mock_traceback.return_value = "Test error" + mock_generate.return_value = ( + "result = {'type': 'string', 'value': 'Hello World'}" + ) + mock_execute.side_effect = [ + Exception("Test error"), + {"type": "string", "value": "Hello World"}, + ] + + agent.context.dfs[0].to_csv = Mock( + return_value="""country,gdp,happiness_index +China,654881226,6.66 +Japan,9009692259,7.16 +Spain,8446903488,6.38 +""" + ) + + agent.chat("Hello world") + + last_prompt = agent.last_prompt + if sys.platform.startswith("win"): + last_prompt = last_prompt.replace("\r\n", "\n") + + print(last_prompt) + + assert ( + last_prompt + == """ +dfs[0]:10x3 +country,gdp,happiness_index +China,654881226,6.66 +Japan,9009692259,7.16 +Spain,8446903488,6.38 + + +The user asked the following question: +### QUERY + Hello world + +You generated this python code: +result = {'type': 'string', 'value': 'Hello World'} + +It fails with the following error: +Test error + +Fix the python code above and return the new python code:""" # noqa: E501 + ) + + @patch("os.makedirs") + def test_load_config_with_cache(self, mock_makedirs, agent): + # Modify the agent's configuration + agent.context.config.save_charts = True + agent.context.config.enable_cache = True + + # Call the initialize method + agent.configure() + + # Assertions for enabling cache + cache_dir = os.path.join(os.getcwd(), "cache") + mock_makedirs.assert_any_call( + cache_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True + ) + + # Assertions for saving charts + charts_dir = os.path.join(os.getcwd(), agent.context.config.save_charts_path) + mock_makedirs.assert_any_call( + charts_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True + ) + + @patch("os.makedirs") + def test_load_config_without_cache(self, mock_makedirs, agent): + # Modify the agent's configuration + agent.context.config.save_charts = True + agent.context.config.enable_cache = False + + # Call the initialize method + agent.configure() + + # Assertions for saving charts + charts_dir = os.path.join(os.getcwd(), agent.context.config.save_charts_path) + mock_makedirs.assert_called_once_with( + charts_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True + ) + + def test_validate_true_direct_sql_with_non_connector(self, llm, sample_df): + # raise exception with non connector + Agent( + [sample_df], + config={"llm": llm, "enable_cache": False, "direct_sql": True}, + ) + + def test_validate_direct_sql_with_connector(self, llm, sql_connector): + # not exception is raised using single connector + Agent( + [sql_connector], + config={"llm": llm, "enable_cache": False, "direct_sql": True}, + ) + + def test_validate_false_direct_sql_with_connector(self, llm, sql_connector): + # not exception is raised using single connector + Agent( + [sql_connector], + config={"llm": llm, "enable_cache": False, "direct_sql": False}, + ) + + def test_validate_false_direct_sql_with_two_different_connector( + self, llm, sql_connector, pgsql_connector + ): + # not exception is raised using single connector + Agent( + [sql_connector, pgsql_connector], + config={"llm": llm, "enable_cache": False, "direct_sql": False}, + ) + + def test_train_method_with_qa(self, agent): + queries = ["query1", "query2"] + codes = ["code1", "code2"] + agent.train(queries, codes) + + agent._vectorstore.add_docs.assert_not_called() + agent._vectorstore.add_question_answer.assert_called_once_with(queries, codes) + + def test_train_method_with_docs(self, agent): + docs = ["doc1"] + agent.train(docs=docs) + + agent._vectorstore.add_question_answer.assert_not_called() + agent._vectorstore.add_docs.assert_called_once() + agent._vectorstore.add_docs.assert_called_once_with(docs) + + def test_train_method_with_docs_and_qa(self, agent): + docs = ["doc1"] + queries = ["query1", "query2"] + codes = ["code1", "code2"] + agent.train(queries, codes, docs=docs) + + agent._vectorstore.add_question_answer.assert_called_once() + agent._vectorstore.add_question_answer.assert_called_once_with(queries, codes) + agent._vectorstore.add_docs.assert_called_once() + agent._vectorstore.add_docs.assert_called_once_with(docs) + + def test_train_method_with_queries_but_no_code(self, agent): + queries = ["query1", "query2"] + with pytest.raises(ValueError): + agent.train(queries) + + def test_train_method_with_code_but_no_queries(self, agent): + codes = ["code1", "code2"] + with pytest.raises(ValueError): + agent.train(codes) diff --git a/tests/unit_tests/connectors/test_airtable.py b/tests/unit_tests/connectors/test_airtable.py index 25b91b366..c335c8bf4 100644 --- a/tests/unit_tests/connectors/test_airtable.py +++ b/tests/unit_tests/connectors/test_airtable.py @@ -5,18 +5,19 @@ import pandas as pd from pandasai.connectors import AirtableConnector -from pandasai.connectors.base import AirtableConnectorConfig +from pandasai.connectors.airtable import AirtableConnectorConfig class TestAirTableConnector(unittest.TestCase): def setUp(self) -> None: # Define your ConnectorConfig instance here self.config = AirtableConnectorConfig( - token="your_token", base_id="your_baseid", table="your_table_name", + database="abc", + api_key="you_key", where=[["Status", "=", "In progress"]], - ).dict() + ) self.root_url = "https://api.airtable.com/v0/" self.expected_data_json = """ { @@ -53,12 +54,12 @@ def setUp(self) -> None: self.connector = AirtableConnector(config=self.config) def test_constructor_and_properties(self): - self.assertEqual(self.connector._config, self.config) + self.assertEqual(self.connector.config, self.config) self.assertEqual(self.connector._root_url, self.root_url) self.assertEqual(self.connector._cache_interval, 600) def test_fallback_name(self): - self.assertEqual(self.connector.fallback_name, self.config["table"]) + self.assertEqual(self.connector.fallback_name, self.config.table) @patch("requests.get") def test_execute(self, mock_request_get): @@ -83,7 +84,7 @@ def test_head(self, mock_request_get): def test_fallback_name_property(self): # Test fallback_name property fallback_name = self.connector.fallback_name - self.assertEqual(fallback_name, self.config["table"]) + self.assertEqual(fallback_name, self.config.table) @patch("requests.get") def test_rows_count_property(self, mock_request_get): diff --git a/tests/unit_tests/connectors/test_base.py b/tests/unit_tests/connectors/test_base.py index 4cda34888..7f58ac909 100644 --- a/tests/unit_tests/connectors/test_base.py +++ b/tests/unit_tests/connectors/test_base.py @@ -21,7 +21,7 @@ def _load_connector_config(self, config: BaseConnectorConfig): def _init_connection(self, config: BaseConnectorConfig): pass - def head(self): + def head(self, n: int = 5): pass def execute(self): @@ -63,7 +63,7 @@ def mock_connector(mock_config): def test_base_connector_initialization(mock_config, mock_connector): - assert mock_connector._config == mock_config + assert mock_connector.config == mock_config def test_base_connector_path_property(mock_connector): diff --git a/tests/unit_tests/connectors/test_databricks.py b/tests/unit_tests/connectors/test_databricks.py index 0a1ed52ad..fa235e98c 100644 --- a/tests/unit_tests/connectors/test_databricks.py +++ b/tests/unit_tests/connectors/test_databricks.py @@ -3,12 +3,12 @@ import pandas as pd -from pandasai.connectors import DatabricksConnector -from pandasai.connectors.base import DatabricksConnectorConfig +from pandasai.ee.connectors import DatabricksConnector +from pandasai.ee.connectors.databricks import DatabricksConnectorConfig class TestDataBricksConnector(unittest.TestCase): - @patch("pandasai.connectors.databricks.create_engine", autospec=True) + @patch("pandasai.ee.connectors.databricks.create_engine", autospec=True) # @patch("pandasai.connectors.sql.sql", autospec=True) def setUp(self, mock_create_engine): # Create a mock engine and connection @@ -32,15 +32,15 @@ def setUp(self, mock_create_engine): # Create an instance of SQLConnector self.connector = DatabricksConnector(self.config) - @patch("pandasai.connectors.DatabricksConnector._load_connector_config") - @patch("pandasai.connectors.DatabricksConnector._init_connection") + @patch("pandasai.ee.connectors.DatabricksConnector._load_connector_config") + @patch("pandasai.ee.connectors.DatabricksConnector._init_connection") def test_constructor_and_properties( self, mock_load_connector_config, mock_init_connection ): # Test constructor and properties mock_load_connector_config.return_value = self.config - self.assertEqual(self.connector._config, self.config) + self.assertEqual(self.connector.config, self.config) self.assertEqual(self.connector._engine, self.mock_engine) self.assertEqual(self.connector._connection, self.mock_connection) self.assertEqual(self.connector._cache_interval, 600) diff --git a/tests/unit_tests/connectors/test_google_big_query.py b/tests/unit_tests/connectors/test_google_big_query.py new file mode 100644 index 000000000..74547d3d5 --- /dev/null +++ b/tests/unit_tests/connectors/test_google_big_query.py @@ -0,0 +1,90 @@ +import unittest +from unittest.mock import Mock, patch + +import pandas as pd + +from pandasai.ee.connectors import GoogleBigQueryConnector +from pandasai.ee.connectors.google_big_query import GoogleBigQueryConnectorConfig + + +class TestGoogleBigQueryConnector(unittest.TestCase): + @patch("pandasai.ee.connectors.google_big_query.create_engine", autospec=True) + def setUp(self, mock_create_engine) -> None: + self.mock_engine = Mock() + self.mock_connection = Mock() + self.mock_engine.connect.return_value = self.mock_connection + mock_create_engine.return_value = self.mock_engine + + self.config = GoogleBigQueryConnectorConfig( + dialect="bigquery", + database="database", + table="yourtable", + credentials_path="keyfile.json", + projectID="project_id", + ).dict() + + self.connector = GoogleBigQueryConnector(self.config) + + @patch("pandasai.ee.connectors.GoogleBigQueryConnector._load_connector_config") + @patch("pandasai.ee.connectors.GoogleBigQueryConnector._init_connection") + def test_constructor_and_properties( + self, mock_load_connector_config, mock_init_connection + ): + # Test constructor and properties + self.assertEqual(self.connector.config, self.config) + self.assertEqual(self.connector._engine, self.mock_engine) + self.assertEqual(self.connector._connection, self.mock_connection) + self.assertEqual(self.connector._cache_interval, 600) + GoogleBigQueryConnector(self.config) + mock_load_connector_config.assert_called() + mock_init_connection.assert_called() + + def test_repr_method(self): + # Test __repr__ method + expected_repr = ( + "" + ) + self.assertEqual(repr(self.connector), expected_repr) + + @patch("pandasai.connectors.sql.pd.read_sql", autospec=True) + def test_head_method(self, mock_read_sql): + expected_data = pd.DataFrame({"Column1": [1, 2, 3], "Column2": [4, 5, 6]}) + mock_read_sql.return_value = expected_data + head_data = self.connector.head() + pd.testing.assert_frame_equal(head_data, expected_data) + + def test_rows_count_property(self): + # Test rows_count property + self.connector._rows_count = None + self.mock_connection.execute.return_value.fetchone.return_value = ( + 50, + ) # Sample rows count + rows_count = self.connector.rows_count + self.assertEqual(rows_count, 50) + + def test_columns_count_property(self): + # Test columns_count property + self.connector._columns_count = None + mock_df = Mock() + mock_df.columns = ["Column1", "Column2"] + self.connector.head = Mock(return_value=mock_df) + columns_count = self.connector.columns_count + self.assertEqual(columns_count, 2) + + def test_column_hash_property(self): + # Test column_hash property + mock_df = Mock() + mock_df.columns = ["Column1", "Column2"] + self.connector.head = Mock(return_value=mock_df) + column_hash = self.connector.column_hash + self.assertIsNotNone(column_hash) + self.assertEqual( + column_hash, + "0d045cff164deef81e24b0ed165b7c9c2789789f013902115316cde9d214fe63", + ) + + def test_fallback_name_property(self): + # Test fallback_name property + fallback_name = self.connector.fallback_name + self.assertEqual(fallback_name, "yourtable") diff --git a/tests/unit_tests/connectors/test_pandas.py b/tests/unit_tests/connectors/test_pandas.py new file mode 100644 index 000000000..aad96f284 --- /dev/null +++ b/tests/unit_tests/connectors/test_pandas.py @@ -0,0 +1,46 @@ +import pandas as pd + +from pandasai.connectors import PandasConnector + + +class TestPandasConnector: + def test_load_dataframe_from_list(self): + input_data = [ + {"column1": 1, "column2": 4}, + {"column1": 2, "column2": 5}, + {"column1": 3, "column2": 6}, + ] + connector = PandasConnector({"original_df": input_data}) + assert isinstance(connector.execute(), pd.DataFrame) + + def test_load_dataframe_from_dict(self): + input_data = {"column1": [1, 2, 3], "column2": [4, 5, 6]} + connector = PandasConnector({"original_df": input_data}) + assert isinstance(connector.execute(), pd.DataFrame) + + def test_load_dataframe_from_pandas_dataframe(self): + input_data = pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) + connector = PandasConnector({"original_df": input_data}) + assert isinstance(connector.execute(), pd.DataFrame) + + def test_import_pandas_series(self): + input_data = pd.Series([1, 2, 3]) + connector = PandasConnector({"original_df": input_data}) + assert isinstance(connector.execute(), pd.DataFrame) + + def test_to_json(self): + input_data = pd.DataFrame( + { + "EmployeeID": [1, 2, 3, 4, 5], + "Name": ["John", "Emma", "Liam", "Olivia", "William"], + "Department": ["HR", "Sales", "IT", "Marketing", "Finance"], + } + ) + connector = PandasConnector({"original_df": input_data}) + data = connector.to_json() + + assert isinstance(data, dict) + assert "name" in data + assert "description" in data + assert "head" in data + assert isinstance(data["head"], list) diff --git a/tests/unit_tests/connectors/test_snowflake.py b/tests/unit_tests/connectors/test_snowflake.py index eeeb1acc7..0318bdff3 100644 --- a/tests/unit_tests/connectors/test_snowflake.py +++ b/tests/unit_tests/connectors/test_snowflake.py @@ -3,12 +3,12 @@ import pandas as pd -from pandasai.connectors import SnowFlakeConnector -from pandasai.connectors.base import SnowFlakeConnectorConfig +from pandasai.ee.connectors import SnowFlakeConnector +from pandasai.ee.connectors.snowflake import SnowFlakeConnectorConfig class TestSQLConnector(unittest.TestCase): - @patch("pandasai.connectors.snowflake.create_engine", autospec=True) + @patch("pandasai.ee.connectors.snowflake.create_engine", autospec=True) def setUp(self, mock_create_engine): # Create a mock engine and connection self.mock_engine = Mock() @@ -32,13 +32,13 @@ def setUp(self, mock_create_engine): # Create an instance of SQLConnector self.connector = SnowFlakeConnector(self.config) - @patch("pandasai.connectors.SnowFlakeConnector._load_connector_config") - @patch("pandasai.connectors.SnowFlakeConnector._init_connection") + @patch("pandasai.ee.connectors.SnowFlakeConnector._load_connector_config") + @patch("pandasai.ee.connectors.SnowFlakeConnector._init_connection") def test_constructor_and_properties( self, mock_load_connector_config, mock_init_connection ): # Test constructor and properties - self.assertEqual(self.connector._config, self.config) + self.assertEqual(self.connector.config, self.config) self.assertEqual(self.connector._engine, self.mock_engine) self.assertEqual(self.connector._connection, self.mock_connection) self.assertEqual(self.connector._cache_interval, 600) @@ -65,7 +65,7 @@ def test_build_query_method(self): self.assertEqual(str(query), expected_query) - @patch("pandasai.connectors.snowflake.pd.read_sql", autospec=True) + @patch("pandasai.ee.connectors.snowflake.pd.read_sql", autospec=True) def test_head_method(self, mock_read_sql): expected_data = pd.DataFrame({"Column1": [1, 2, 3], "Column2": [4, 5, 6]}) mock_read_sql.return_value = expected_data diff --git a/tests/unit_tests/connectors/test_sql.py b/tests/unit_tests/connectors/test_sql.py index 57abf2da4..d2d5092c2 100644 --- a/tests/unit_tests/connectors/test_sql.py +++ b/tests/unit_tests/connectors/test_sql.py @@ -3,8 +3,11 @@ import pandas as pd -from pandasai.connectors.base import SQLConnectorConfig -from pandasai.connectors.sql import PostgreSQLConnector, SQLConnector +from pandasai.connectors.sql import ( + PostgreSQLConnector, + SQLConnector, + SQLConnectorConfig, +) from pandasai.exceptions import MaliciousQueryError @@ -40,7 +43,7 @@ def test_constructor_and_properties( ): # Test constructor and properties - self.assertEqual(self.connector._config, self.config) + self.assertEqual(self.connector.config, self.config) self.assertEqual(self.connector._engine, self.mock_engine) self.assertEqual(self.connector._connection, self.mock_connection) self.assertEqual(self.connector._cache_interval, 600) diff --git a/tests/unit_tests/connectors/test_sqlite.py b/tests/unit_tests/connectors/test_sqlite.py index d146c313e..eff82745e 100644 --- a/tests/unit_tests/connectors/test_sqlite.py +++ b/tests/unit_tests/connectors/test_sqlite.py @@ -4,7 +4,7 @@ import pandas as pd from pandasai.connectors import SqliteConnector -from pandasai.connectors.base import SqliteConnectorConfig +from pandasai.connectors.sql import SqliteConnectorConfig class TestSqliteConnector(unittest.TestCase): @@ -27,7 +27,7 @@ def test_constructor_and_properties( self, mock_load_connector_config, mock_init_connection ): # Test constructor and properties - self.assertEqual(self.connector._config, self.config) + self.assertEqual(self.connector.config, self.config) self.assertEqual(self.connector._engine, self.mock_engine) self.assertEqual(self.connector._connection, self.mock_connection) self.assertEqual(self.connector._cache_interval, 600) diff --git a/tests/unit_tests/helpers/test_file_importer.py b/tests/unit_tests/helpers/test_file_importer.py new file mode 100644 index 000000000..c5a59ba89 --- /dev/null +++ b/tests/unit_tests/helpers/test_file_importer.py @@ -0,0 +1,49 @@ +""" +Unit tests for the FileImporter class +""" + +import pandas as pd +import pytest + +from pandasai.helpers.file_importer import FileImporter + + +class TestFileImporter: + """ + Unit tests for the FileImporter class + """ + + @pytest.fixture + def mocked_df(self): + return pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) + + def test_import_csv_file(self, mocked_df, mocker): + mocker.patch.object( + pd, + "read_csv", + return_value=mocked_df, + ) + df = FileImporter.import_from_file("sample.csv") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + def test_import_parquet_file(self, mocked_df, mocker): + mocker.patch.object(pd, "read_parquet", return_value=mocked_df) + df = FileImporter.import_from_file("sample.parquet") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + def test_import_excel_file(self, mocked_df, mocker): + mocker.patch.object( + pd, + "read_excel", + return_value=mocked_df, + ) + df = FileImporter.import_from_file("sample.xlsx") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + @pytest.mark.parametrize("file_path", ["sample.txt", "sample.docx", "sample.pdf"]) + def test_invalid_file_format(self, file_path): + with pytest.raises(ValueError): + FileImporter.import_from_file(file_path) diff --git a/tests/unit_tests/helpers/test_openai_info.py b/tests/unit_tests/helpers/test_openai_info.py index 7a0d54084..0de197a39 100644 --- a/tests/unit_tests/helpers/test_openai_info.py +++ b/tests/unit_tests/helpers/test_openai_info.py @@ -1,7 +1,9 @@ +import os + import pandas as pd import pytest -from pandasai import SmartDataframe +from pandasai.agent import Agent from pandasai.helpers import ( OpenAICallbackHandler, get_openai_callback, @@ -151,7 +153,7 @@ def test_handler_azure_openai( @pytest.mark.parametrize( "model_name, expected_cost", [ - ("ft:gpt-3.5-turbo-0613:your-org:custom-model-name:1abcdefg", 0.024), + ("ft:gpt-3.5-turbo-0613:your-org:custom-model-name:1abcdefg", 0.028), ("gpt-35-turbo-0613.ft-0123456789abcdefghijklmnopqrstuv", 0.0035), ], ) @@ -174,6 +176,9 @@ def test_handler_finetuned_model( assert handler.total_cost == expected_cost def test_openai_callback(self, mocker): + os.environ["PANDASAI_API_URL"] = "" + os.environ["PANDASAI_API_KEY"] = "" + df = pd.DataFrame([1, 2, 3]) llm = OpenAI(api_token="test") llm_response = OpenAIObject( @@ -199,9 +204,17 @@ def test_openai_callback(self, mocker): ) mocker.patch.object(llm.client, "create", return_value=llm_response) - sdf = SmartDataframe(df, config={"llm": llm, "enable_cache": False}) + # Mock the check_if_related_to_conversation method to not + # perform additional api requests to OpenAI + mocker.patch.object( + Agent, + "check_if_related_to_conversation", + return_value=False, + ) + + agent = Agent([df], config={"llm": llm, "enable_cache": False}) with get_openai_callback() as cb: - sdf.chat("some question 1") + agent.chat("some question 1") assert cb.total_tokens == 3 assert cb.prompt_tokens == 2 assert cb.completion_tokens == 1 @@ -210,14 +223,14 @@ def test_openai_callback(self, mocker): total_tokens = cb.total_tokens with get_openai_callback() as cb: - sdf.chat("some question 2") - sdf.chat("some question 3") + agent.chat("some question 2") + agent.chat("some question 3") assert cb.total_tokens == total_tokens * 2 with get_openai_callback() as cb: - sdf.chat("some question 4") - sdf.chat("some question 5") - sdf.chat("some question 6") + agent.chat("some question 4") + agent.chat("some question 5") + agent.chat("some question 6") assert cb.total_tokens == total_tokens * 3 diff --git a/tests/unit_tests/llms/test_bamboo_llm.py b/tests/unit_tests/llms/test_bamboo_llm.py new file mode 100644 index 000000000..6077aa9f5 --- /dev/null +++ b/tests/unit_tests/llms/test_bamboo_llm.py @@ -0,0 +1,39 @@ +import unittest +from unittest.mock import MagicMock, patch + +from pandasai.llm.bamboo_llm import BambooLLM +from pandasai.prompts.base import BasePrompt + + +class TestBambooLLM(unittest.TestCase): + def get_prompt(self): + class MockBasePrompt(BasePrompt): + template: str = "instruction" + + def to_json(self): + return { + "code": ["print('Hello')", "for i in range(10): print(i)"], + "query": ["What is Chroma?", "How does it work?"], + } + + return MockBasePrompt() + + def get_context(self): + return MagicMock() + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_call_method(self, mock_request): + prompt = self.get_prompt() + context = self.get_context() + bllm = BambooLLM(api_key="dummy_key") + bllm.call(prompt, context) + call_args = mock_request.call_args_list[0][0] + mock_request.assert_called_once() + assert call_args[1] == "POST" + assert call_args[2] == "/llm/chat" + assert mock_request.call_args_list[0][1] == { + "json": { + "code": ["print('Hello')", "for i in range(10): print(i)"], + "query": ["What is Chroma?", "How does it work?"], + } + } diff --git a/tests/unit_tests/llms/test_base_hf.py b/tests/unit_tests/llms/test_base_hf.py deleted file mode 100644 index 6ebaa6fcc..000000000 --- a/tests/unit_tests/llms/test_base_hf.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Unit tests for the base huggingface LLM class""" - -import pytest -import requests - -from pandasai.exceptions import LLMResponseHTTPError -from pandasai.llm.base import HuggingFaceLLM -from pandasai.prompts import AbstractPrompt - - -class TestBaseHfLLM: - """Unit tests for the huggingface LLM class""" - - @pytest.fixture - def api_response(self): - return [{"generated_text": "Some text"}] - - @pytest.fixture - def api_response_401(self): - return {"error": "Authorization header is correct, but the token seems invalid"} - - @pytest.fixture - def prompt(self): - class MockAbstractPrompt(AbstractPrompt): - template: str = "instruction" - - return MockAbstractPrompt() - - def test_type(self): - assert HuggingFaceLLM(api_token="test_token").type == "huggingface-llm" - - def test_api_url(self): - assert ( - HuggingFaceLLM(api_token="test_token")._api_url - == "https://api-inference.huggingface.co/models/" - ) - - def test_query(self, mocker, api_response): - response_mock = mocker.Mock() - response_mock.status_code = 200 - response_mock.json.return_value = api_response - mocker.patch("requests.post", return_value=response_mock) - - # Call the query method - llm = HuggingFaceLLM(api_token="test_token") - payload = {"inputs": "Some input text"} - result = llm.query(payload) - - # Check that the mock was called correctly - requests.post.assert_called_once_with( - llm._api_url, - headers={"Authorization": "Bearer test_token"}, - json=payload, - timeout=60, - ) - - # Check that the result is correct - assert result == api_response[0]["generated_text"] - - def test_query_http_error_401(self, mocker, api_response_401): - response_mock = mocker.Mock() - response_mock.status_code = 401 - response_mock.json.return_value = api_response_401 - mocker.patch("requests.post", return_value=response_mock) - - llm = HuggingFaceLLM(api_token="test_token") - payload = {"inputs": "Some input text"} - - with pytest.raises(LLMResponseHTTPError) as exc: - llm.query(payload) - - assert api_response_401.get("error") in str(exc.value) - - requests.post.assert_called_once_with( - llm._api_url, - headers={"Authorization": "Bearer test_token"}, - json=payload, - timeout=60, - ) - - def test_call(self, mocker, prompt): - huggingface = HuggingFaceLLM(api_token="test_token") - - mocker.patch.object(huggingface, "call", return_value="Generated text") - - result = huggingface.call(prompt, "value", "suffix") - assert result == "Generated text" - - def test_call_removes_original_prompt(self, mocker): - huggingface = HuggingFaceLLM(api_token="test_token") - - class MockAbstractPrompt(AbstractPrompt): - template: str = "instruction " - - instruction = MockAbstractPrompt() - suffix = "suffix " - - mocker.patch.object( - huggingface, "query", return_value="instruction suffix generated text" - ) - - result = huggingface.call(instruction, suffix) - assert result == "generated text" diff --git a/tests/unit_tests/llms/test_base_llm.py b/tests/unit_tests/llms/test_base_llm.py index 8f044adc3..e41f3dd62 100644 --- a/tests/unit_tests/llms/test_base_llm.py +++ b/tests/unit_tests/llms/test_base_llm.py @@ -3,6 +3,7 @@ import pytest from pandasai.exceptions import APIKeyNotFoundError +from pandasai.helpers.memory import Memory from pandasai.llm import LLM @@ -64,3 +65,37 @@ def test_extract_code(self): """ assert LLM()._extract_code(code) == "print('Hello World')" + + def test_get_system_prompt_empty_memory(self): + assert LLM().get_system_prompt(Memory()) == "\n" + + def test_get_system_prompt_memory_with_agent_info(self): + mem = Memory(agent_info="xyz") + assert LLM().get_system_prompt(mem) == " xyz \n" + + def test_get_system_prompt_memory_with_agent_info_messages(self): + mem = Memory(agent_info="xyz", memory_size=10) + mem.add("hello world", True) + mem.add('print("hello world)', False) + mem.add("hello world", True) + print(mem.get_messages()) + assert ( + LLM().get_system_prompt(mem) + == ' xyz \n\n### PREVIOUS CONVERSATION\n### QUERY\n hello world\n### ANSWER\n print("hello world)\n' + ) + + def test_prepend_system_prompt_with_empty_mem(self): + assert LLM().prepend_system_prompt("hello world", Memory()) == "\nhello world" + + def test_prepend_system_prompt_with_non_empty_mem(self): + mem = Memory(agent_info="xyz", memory_size=10) + mem.add("hello world", True) + mem.add('print("hello world)', False) + mem.add("hello world", True) + assert ( + LLM().prepend_system_prompt("hello world", mem) + == ' xyz \n\n### PREVIOUS CONVERSATION\n### QUERY\n hello world\n### ANSWER\n print("hello world)\nhello world' + ) + + def test_prepend_system_prompt_with_memory_none(self): + assert LLM().prepend_system_prompt("hello world", None) == "hello world" diff --git a/tests/unit_tests/llms/test_falcon.py b/tests/unit_tests/llms/test_falcon.py deleted file mode 100644 index e9d4eea0b..000000000 --- a/tests/unit_tests/llms/test_falcon.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Unit tests for the falcon LLM class""" - -from pandasai.llm import Falcon - - -class TestFalconLLM: - """Unit tests for the Falcon LLM class""" - - def test_type(self): - assert Falcon(api_token="test").type == "falcon" diff --git a/tests/unit_tests/llms/test_google_palm.py b/tests/unit_tests/llms/test_google_palm.py index 9c96d7e82..40d32e3cc 100644 --- a/tests/unit_tests/llms/test_google_palm.py +++ b/tests/unit_tests/llms/test_google_palm.py @@ -1,12 +1,13 @@ """Unit tests for the openai LLM class""" import re +from unittest.mock import MagicMock import pytest from google import generativeai from pandasai.exceptions import APIKeyNotFoundError from pandasai.llm import GooglePalm -from pandasai.prompts import AbstractPrompt +from pandasai.prompts import BasePrompt class MockedCompletion: @@ -19,10 +20,14 @@ class TestGooglePalm: @pytest.fixture def prompt(self): - class MockAbstractPrompt(AbstractPrompt): + class MockBasePrompt(BasePrompt): template: str = "Hello" - return MockAbstractPrompt() + return MockBasePrompt() + + @pytest.fixture + def context(self): + return MagicMock() def test_type_without_token(self): with pytest.raises(APIKeyNotFoundError): @@ -47,44 +52,44 @@ def test_params_setting(self): assert llm.top_k == 50 assert llm.max_output_tokens == 64 - def test_validations(self, prompt): + def test_validations(self, prompt, context): with pytest.raises( ValueError, match=re.escape("temperature must be in the range [0.0, 1.0]") ): - GooglePalm(api_key="test", temperature=-1).call(prompt, "World") + GooglePalm(api_key="test", temperature=-1).call(prompt, context) with pytest.raises( ValueError, match=re.escape("temperature must be in the range [0.0, 1.0]") ): - GooglePalm(api_key="test", temperature=1.1).call(prompt, "World") + GooglePalm(api_key="test", temperature=1.1).call(prompt, context) with pytest.raises( ValueError, match=re.escape("top_p must be in the range [0.0, 1.0]") ): - GooglePalm(api_key="test", top_p=-1).call(prompt, "World") + GooglePalm(api_key="test", top_p=-1).call(prompt, context) with pytest.raises( ValueError, match=re.escape("top_p must be in the range [0.0, 1.0]") ): - GooglePalm(api_key="test", top_p=1.1).call(prompt, "World") + GooglePalm(api_key="test", top_p=1.1).call(prompt, context) with pytest.raises( ValueError, match=re.escape("top_k must be in the range [0.0, 100.0]") ): - GooglePalm(api_key="test", top_k=-100).call(prompt, "World") + GooglePalm(api_key="test", top_k=-100).call(prompt, context) with pytest.raises( ValueError, match=re.escape("top_k must be in the range [0.0, 100.0]") ): - GooglePalm(api_key="test", top_k=110).call(prompt, "World") + GooglePalm(api_key="test", top_k=110).call(prompt, context) with pytest.raises( ValueError, match=re.escape("max_output_tokens must be greater than zero") ): - GooglePalm(api_key="test", max_output_tokens=0).call(prompt, "World") + GooglePalm(api_key="test", max_output_tokens=0).call(prompt, context) with pytest.raises(ValueError, match=re.escape("model is required.")): - GooglePalm(api_key="test", model="").call(prompt, "World") + GooglePalm(api_key="test", model="").call(prompt, context) def test_text_generation(self, mocker): llm = GooglePalm(api_key="test") @@ -105,5 +110,5 @@ def test_call(self, mocker, prompt): generativeai, "generate_text", return_value=expected_response ) - result = llm.call(instruction=prompt, suffix="!") + result = llm.call(instruction=prompt) assert result == expected_text diff --git a/tests/unit_tests/llms/test_google_vertexai.py b/tests/unit_tests/llms/test_google_vertexai.py index 7f4a63299..c92d6ee5d 100644 --- a/tests/unit_tests/llms/test_google_vertexai.py +++ b/tests/unit_tests/llms/test_google_vertexai.py @@ -59,3 +59,7 @@ def test_validate_without_model(self, google_vertexai: GoogleVertexAI): google_vertexai.model = None with pytest.raises(ValueError, match="model is required."): google_vertexai._validate() + + def test_validate_with_code_chat_model(self, google_vertexai: GoogleVertexAI): + google_vertexai.model = "codechat-bison@001" + google_vertexai._validate() diff --git a/tests/unit_tests/llms/test_huggingface_text_gen.py b/tests/unit_tests/llms/test_huggingface_text_gen.py index 0ef1712ec..9266f38f3 100644 --- a/tests/unit_tests/llms/test_huggingface_text_gen.py +++ b/tests/unit_tests/llms/test_huggingface_text_gen.py @@ -1,9 +1,9 @@ """Unit tests for the LLaMa2TextGen LLM class""" from pandasai.llm import HuggingFaceTextGen -from pandasai.prompts import AbstractPrompt +from pandasai.prompts import BasePrompt -class MockAbstractPrompt(AbstractPrompt): +class MockBasePrompt(BasePrompt): template: str = "instruction." @@ -54,7 +54,7 @@ def test_completion(self, mocker): llm = HuggingFaceTextGen(inference_server_url="http://127.0.0.1:8080") - instruction = MockAbstractPrompt() + instruction = MockBasePrompt() result = llm.call(instruction) tgi_mock.assert_called_once_with( diff --git a/tests/unit_tests/llms/test_langchain_llm.py b/tests/unit_tests/llms/test_langchain_llm.py index 598a89edd..779285cfa 100644 --- a/tests/unit_tests/llms/test_langchain_llm.py +++ b/tests/unit_tests/llms/test_langchain_llm.py @@ -1,6 +1,5 @@ """Unit tests for the base LLM class""" - import pytest from langchain_community.chat_models import ChatOpenAI from langchain_community.llms import OpenAI @@ -12,7 +11,7 @@ ) from pandasai.llm import LangchainLLM -from pandasai.prompts import AbstractPrompt +from pandasai.prompts import BasePrompt class TestLangchainLLM: @@ -44,10 +43,10 @@ def generate(self, prompts, stop=None, run_manager=None, **kwargs): @pytest.fixture def prompt(self): - class MockAbstractPrompt(AbstractPrompt): + class MockBasePrompt(BasePrompt): template: str = "Hello" - return MockAbstractPrompt() + return MockBasePrompt() def test_langchain_llm_type(self, langchain_llm): langchain_wrapper = LangchainLLM(langchain_llm) diff --git a/tests/unit_tests/llms/test_openai.py b/tests/unit_tests/llms/test_openai.py index 5f69a5c02..e9e333507 100644 --- a/tests/unit_tests/llms/test_openai.py +++ b/tests/unit_tests/llms/test_openai.py @@ -4,7 +4,7 @@ from pandasai.exceptions import APIKeyNotFoundError, UnsupportedModelError from pandasai.llm import OpenAI -from pandasai.prompts import AbstractPrompt +from pandasai.prompts import BasePrompt class OpenAIObject: @@ -17,10 +17,10 @@ class TestOpenAILLM: @pytest.fixture def prompt(self): - class MockAbstractPrompt(AbstractPrompt): + class MockBasePrompt(BasePrompt): template: str = "instruction" - return MockAbstractPrompt() + return MockBasePrompt() def test_type_without_token(self): with pytest.raises(APIKeyNotFoundError): @@ -125,7 +125,7 @@ def test_call_supported_chat_model(self, mocker, prompt): result = openai.call(instruction=prompt) assert result == "response" - def test_call_finetuned_model(self, mocker, prompt): + def test_call_with_system_prompt(self, mocker, prompt): openai = OpenAI( api_token="test", model="ft:gpt-3.5-turbo:my-org:custom_suffix:id" ) diff --git a/tests/unit_tests/llms/test_starcoder.py b/tests/unit_tests/llms/test_starcoder.py deleted file mode 100644 index 84b4ea515..000000000 --- a/tests/unit_tests/llms/test_starcoder.py +++ /dev/null @@ -1,10 +0,0 @@ -"""Unit tests for the starcoder LLM class""" - -from pandasai.llm import Starcoder - - -class TestStarcoderLLM: - """Unit tests for the Starcoder LLM class""" - - def test_type(self): - assert Starcoder(api_token="test").type == "starcoder" diff --git a/tests/unit_tests/pipelines/smart_datalake/test_code_execution.py b/tests/unit_tests/pipelines/smart_datalake/test_code_execution.py index fa2cbd7ee..d6ec77f20 100644 --- a/tests/unit_tests/pipelines/smart_datalake/test_code_execution.py +++ b/tests/unit_tests/pipelines/smart_datalake/test_code_execution.py @@ -1,24 +1,18 @@ from typing import Optional -from unittest.mock import MagicMock, Mock +from unittest.mock import Mock import pandas as pd import pytest -from pandasai.exceptions import InvalidLLMOutputType from pandasai.helpers.logger import Logger from pandasai.helpers.skills_manager import SkillsManager from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.code_execution import CodeExecution from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.smart_datalake_chat.code_execution import CodeExecution -from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt -from pandasai.prompts.correct_output_type_error_prompt import ( - CorrectOutputTypeErrorPrompt, -) -from pandasai.smart_dataframe import SmartDataframe class TestCodeExecution: - "Unit test for Smart Data Lake Code Execution" + "Unit test for Code Execution" throw_exception = True @@ -69,10 +63,6 @@ def sample_df(self): } ) - @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) - @pytest.fixture def config(self, llm): return {"llm": llm, "enable_cache": True} @@ -105,16 +95,20 @@ def mock_intermediate_values(key: str): elif key == "code_manager": return mock_code_manager - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(return_value="Mocked Result") + context.get = Mock(side_effect=mock_intermediate_values) + # context._query_exec_tracker = Mock() + # context.query_exec_tracker.execute_func = Mock(return_value="Mocked Result") result = code_execution.execute( - input="Test Code", context=context, logger=logger + input='result={"type":"string", "value":"5"}', + context=context, + logger=logger, ) assert isinstance(code_execution, CodeExecution) - assert result == "Mocked Result" + assert result.output == {"type": "string", "value": "5"} + assert result.message == "Code Executed Successfully" + assert result.success is True def test_code_execution_unsuccessful_after_retries(self, context, logger): # Test Flow : Code Execution Successful after retry @@ -126,15 +120,6 @@ def mock_execute_code(*args, **kwargs): mock_code_manager = Mock() mock_code_manager.execute_code = Mock(side_effect=mock_execute_code) - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock( - return_value=[ - "Interuppted Code", - "Exception Testing", - "Unsuccessful after Retries", - ] - ) - def mock_intermediate_values(key: str): if key == "last_prompt_id": return "Mocked Prompt ID" @@ -143,7 +128,7 @@ def mock_intermediate_values(key: str): elif key == "code_manager": return mock_code_manager - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + context.get = Mock(side_effect=mock_intermediate_values) assert isinstance(code_execution, CodeExecution) @@ -167,60 +152,20 @@ def mock_execute_code(*args, **kwargs): # Conditional return of execute_func method based arguments it is called with def mock_execute_func(*args, **kwargs): - if isinstance(args[0], Mock) and args[0].name == "execute_code": - return mock_execute_code(*args, **kwargs) - else: - return [ - "Interuppted Code", - "Exception Testing", - "Successful after Retry", - ] + return mock_execute_code(*args, **kwargs) mock_code_manager = Mock() - mock_code_manager.execute_code = Mock() + mock_code_manager.execute_code = mock_execute_func mock_code_manager.execute_code.name = "execute_code" - context._query_exec_tracker = Mock() - - context.query_exec_tracker.execute_func = Mock(side_effect=mock_execute_func) - - def mock_intermediate_values(key: str): - if key == "last_prompt_id": - return "Mocked Prompt ID" - elif key == "skills": - return SkillsManager() - elif key == "code_manager": - return mock_code_manager - - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - - result = code_execution.execute( - input="Test Code", context=context, logger=logger + code_execution._retry_run_code = Mock( + return_value='result={"type":"string", "value":"5"}' ) - assert isinstance(code_execution, CodeExecution) - assert result == "Mocked Result after retry" - - def test_get_error_prompt_invalid_llm_output_type(self): - code_execution = CodeExecution() - - # Mock the InvalidLLMOutputType exception - mock_exception = MagicMock(spec=InvalidLLMOutputType) - - # Call the method with the mock exception - result = code_execution._get_error_prompt(mock_exception) + result = code_execution.execute(input="x=5", context=context, logger=logger) - # Assert that the CorrectOutputTypeErrorPrompt is returned - assert isinstance(result, CorrectOutputTypeErrorPrompt) - - def test_get_error_prompt_other_exception(self): - code_execution = CodeExecution() - - # Mock a generic exception - mock_exception = MagicMock(spec=Exception) - - # Call the method with the mock exception - result = code_execution._get_error_prompt(mock_exception) - - # Assert that the CorrectErrorPrompt is returned - assert isinstance(result, CorrectErrorPrompt) + assert code_execution._retry_run_code.assert_called + assert isinstance(code_execution, CodeExecution) + assert result.output == {"type": "string", "value": "5"} + assert result.message == "Code Executed Successfully" + assert result.success is True diff --git a/tests/unit_tests/pipelines/smart_datalake/test_code_generator.py b/tests/unit_tests/pipelines/smart_datalake/test_code_generator.py index 3b139d2cb..d9358ad25 100644 --- a/tests/unit_tests/pipelines/smart_datalake/test_code_generator.py +++ b/tests/unit_tests/pipelines/smart_datalake/test_code_generator.py @@ -1,21 +1,18 @@ from typing import Optional -from unittest.mock import Mock +from unittest.mock import Mock, patch import pandas as pd import pytest from pandasai.helpers.logger import Logger -from pandasai.helpers.output_types import output_type_factory -from pandasai.helpers.viz_library_types import viz_lib_type_factory from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.code_generator import CodeGenerator from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.smart_datalake_chat.code_generator import CodeGenerator from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt -from pandasai.smart_dataframe import SmartDataframe class TestCodeGenerator: - "Unit test for Smart Data Lake Code Generator" + "Unit test for Code Generator" @pytest.fixture def llm(self, output: Optional[str] = None): @@ -64,10 +61,6 @@ def sample_df(self): } ) - @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) - @pytest.fixture def config(self, llm): return {"llm": llm, "enable_cache": True} @@ -85,17 +78,18 @@ def test_init(self, context, config): code_generator = CodeGenerator() assert isinstance(code_generator, CodeGenerator) - def test_code_not_found_in_cache(self, context, logger): + @patch("pandasai.llm.fake.FakeLLM.call") + def test_code_not_found_in_cache(self, mock_call, context, logger): # Test Flow : Code Not found in the cache code_generator = CodeGenerator() mock_get_promt = Mock(return_value=GeneratePythonCodePrompt) def mock_intermediate_values(key: str): - if key == "output_type_helper": - return output_type_factory("DefaultOutputType") + if key == "output_type": + return "" elif key == "viz_lib_helper": - return viz_lib_type_factory("DefaultVizLibraryType") + return "plotly" elif key == "get_prompt": return mock_get_promt @@ -104,13 +98,15 @@ def mock_execute_func(function, *args, **kwargs): return mock_get_promt() return "Mocked LLM Generated Code" - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + context.get = Mock(side_effect=mock_intermediate_values) context._cache = Mock() - context.cache.get = Mock(return_value=None) - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(side_effect=mock_execute_func) + context._cache.get = Mock(return_value=None) - code = code_generator.execute(input=None, context=context, logger=logger) + mock_call.return_value = "test_output" + + result = code_generator.execute( + input="test_input", context=context, logger=logger + ) assert isinstance(code_generator, CodeGenerator) - assert code == "Mocked LLM Generated Code" + assert result.output == "test_output" diff --git a/tests/unit_tests/pipelines/smart_datalake/test_error_prompt_generation.py b/tests/unit_tests/pipelines/smart_datalake/test_error_prompt_generation.py new file mode 100644 index 000000000..1cddaca99 --- /dev/null +++ b/tests/unit_tests/pipelines/smart_datalake/test_error_prompt_generation.py @@ -0,0 +1,111 @@ +from typing import Optional +from unittest.mock import MagicMock + +import pandas as pd +import pytest + +from pandasai.exceptions import InvalidLLMOutputType +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.error_correction_pipeline.error_prompt_generation import ( + ErrorPromptGeneration, +) +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt +from pandasai.prompts.correct_output_type_error_prompt import ( + CorrectOutputTypeErrorPrompt, +) + + +class TestErrorPromptGeneration: + "Unit test for Prompt Generation" + + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + return PipelineContext([sample_df], config) + + def test_init(self): + # Test the initialization of the PromptGeneration + prompt_generation = ErrorPromptGeneration() + assert isinstance(prompt_generation, ErrorPromptGeneration) + + def test_get_error_prompt_invalid_llm_output_type(self, context): + error_prompt = ErrorPromptGeneration() + + # Mock the InvalidLLMOutputType exception + mock_exception = MagicMock(spec=InvalidLLMOutputType) + + error_prompt.context = context + + # Call the method with the mock exception + result = error_prompt.get_prompt(mock_exception, "code") + + # Call the method with the mock exception + result = error_prompt.get_prompt(mock_exception, "code") + + # Assert that the CorrectOutputTypeErrorPrompt is returned + assert isinstance(result, CorrectOutputTypeErrorPrompt) + + def test_get_error_prompt_other_exception(self, context): + # Mock a generic exception + mock_exception = MagicMock(spec=Exception) + + error_prompt = ErrorPromptGeneration() + + error_prompt.context = context + + # Call the method with the mock exception + result = error_prompt.get_prompt(mock_exception, "code") + + # Assert that the CorrectErrorPrompt is returned + assert isinstance(result, CorrectErrorPrompt) diff --git a/tests/unit_tests/pipelines/smart_datalake/test_prompt_generation.py b/tests/unit_tests/pipelines/smart_datalake/test_prompt_generation.py index ca0913a6a..90e7bb651 100644 --- a/tests/unit_tests/pipelines/smart_datalake/test_prompt_generation.py +++ b/tests/unit_tests/pipelines/smart_datalake/test_prompt_generation.py @@ -3,15 +3,18 @@ import pandas as pd import pytest +from pandasai.connectors import PandasConnector from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.prompt_generation import PromptGeneration from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.smart_datalake_chat.prompt_generation import PromptGeneration -from pandasai.prompts.direct_sql_prompt import DirectSQLPrompt from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt +from pandasai.prompts.generate_python_code_with_sql import ( + GeneratePythonCodeWithSQLPrompt, +) class TestPromptGeneration: - "Unit test for Smart Data Lake Prompt Generation" + "Unit test for Prompt Generation" @pytest.fixture def llm(self, output: Optional[str] = None): @@ -60,13 +63,17 @@ def sample_df(self): } ) + @pytest.fixture + def dataframe(self, sample_df): + return PandasConnector({"original_df": sample_df}) + @pytest.fixture def config(self, llm): return {"llm": llm, "enable_cache": True} @pytest.fixture - def context(self, sample_df, config): - return PipelineContext([sample_df], config) + def context(self, dataframe, config): + return PipelineContext([dataframe], config) def test_init(self): # Test the initialization of the PromptGeneration @@ -78,15 +85,11 @@ def test_get_chat_prompt(self, context): prompt_generation = PromptGeneration() context.config.direct_sql = True - gen_key, gen_prompt = prompt_generation._get_chat_prompt(context) - expected_key = "direct_sql_prompt" - assert gen_key == expected_key - assert isinstance(gen_prompt, DirectSQLPrompt) + gen_prompt = prompt_generation.get_chat_prompt(context) + assert isinstance(gen_prompt, GeneratePythonCodeWithSQLPrompt) # Test case 2: direct_sql is False context.config.direct_sql = False - gen_key, gen_prompt = prompt_generation._get_chat_prompt(context) - expected_key = "generate_python_code" - assert gen_key == expected_key + gen_prompt = prompt_generation.get_chat_prompt(context) assert isinstance(gen_prompt, GeneratePythonCodePrompt) diff --git a/tests/unit_tests/pipelines/smart_datalake/test_result_parsing.py b/tests/unit_tests/pipelines/smart_datalake/test_result_parsing.py index fc141df37..432b89e8e 100644 --- a/tests/unit_tests/pipelines/smart_datalake/test_result_parsing.py +++ b/tests/unit_tests/pipelines/smart_datalake/test_result_parsing.py @@ -6,13 +6,12 @@ from pandasai.helpers.logger import Logger from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.result_parsing import ResultParsing from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.smart_datalake_chat.result_parsing import ResultParsing -from pandasai.smart_dataframe import SmartDataframe class TestResultParsing: - "Unit test for Smart Data Lake Result Parsing" + "Unit test for Result Parsing" throw_exception = True @@ -63,10 +62,6 @@ def sample_df(self): } ) - @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) - @pytest.fixture def config(self, llm): return {"llm": llm, "enable_cache": True} @@ -89,23 +84,24 @@ def test_result_parsing_successful_with_no_exceptions(self, context, logger): result_parsing = ResultParsing() result_parsing._add_result_to_memory = Mock() mock_response_parser = Mock() - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock( - return_value="Mocked Parsed Result" - ) def mock_intermediate_values(key: str): if key == "response_parser": return mock_response_parser - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + context.get = Mock(side_effect=mock_intermediate_values) result = result_parsing.execute( - input="Test Result", context=context, logger=logger + input={"type": "string", "value": "Test Result"}, + context=context, + logger=logger, ) assert isinstance(result_parsing, ResultParsing) - assert result == "Mocked Parsed Result" + assert result.output == "Test Result" + assert result.success is True + assert result.message == "Results parsed successfully" + assert result.metadata is None def test_result_parsing_unsuccessful_with_exceptions(self, context, logger): # Test Flow : Code Execution Unsuccessful with exceptions @@ -116,14 +112,11 @@ def test_result_parsing_unsuccessful_with_exceptions(self, context, logger): def mock_result_parsing(*args, **kwargs): raise Exception("Unit test exception") - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(side_effect=mock_result_parsing) - def mock_intermediate_values(key: str): if key == "response_parser": return mock_response_parser - context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + context.get = Mock(side_effect=mock_intermediate_values) result = None try: diff --git a/tests/unit_tests/pipelines/smart_datalake/test_result_validation.py b/tests/unit_tests/pipelines/smart_datalake/test_result_validation.py index ee659dfa8..541226bad 100644 --- a/tests/unit_tests/pipelines/smart_datalake/test_result_validation.py +++ b/tests/unit_tests/pipelines/smart_datalake/test_result_validation.py @@ -6,13 +6,12 @@ from pandasai.helpers.logger import Logger from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.result_validation import ResultValidation from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.smart_datalake_chat.result_validation import ResultValidation -from pandasai.smart_dataframe import SmartDataframe class TestResultValidation: - "Unit test for Smart Data Lake Result Validation" + "Unit test for Result Validation" throw_exception = True @@ -63,10 +62,6 @@ def sample_df(self): } ) - @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) - @pytest.fixture def config(self, llm): return {"llm": llm, "enable_cache": True} @@ -88,76 +83,53 @@ def test_result_is_none(self, context, logger): # Test Flow : Code Execution Successful with no exceptions result_validation = ResultValidation() - context._query_exec_tracker = Mock() - context.query_exec_tracker.get_execution_time = Mock() - context.query_exec_tracker.add_step = Mock() - result = result_validation.execute(input=None, context=context, logger=logger) - assert not context.query_exec_tracker.add_step.called + print(result) + assert isinstance(result_validation, ResultValidation) - assert result is None + assert result.output is None def test_result_is_not_of_dict_type(self, context, logger): # Test Flow : Code Execution Successful with no exceptions result_validation = ResultValidation() - context._query_exec_tracker = Mock() - context.query_exec_tracker.get_execution_time = Mock() - context.query_exec_tracker.add_step = Mock() - result = result_validation.execute( input="Not Dict Type Result", context=context, logger=logger ) - assert not context.query_exec_tracker.add_step.called assert isinstance(result_validation, ResultValidation) - assert result == "Not Dict Type Result" + assert result.output == "Not Dict Type Result" + assert result.success is False + assert result.message is None def test_result_is_of_dict_type_and_valid(self, context, logger): # Test Flow : Code Execution Successful with no exceptions - result_validation = ResultValidation() - output_type_helper = Mock() - - context._query_exec_tracker = Mock() - context.query_exec_tracker.get_execution_time = Mock() - context.get_intermediate_value = Mock(return_value=output_type_helper) - output_type_helper.validate = Mock(return_value=(True, "Mocked Logs")) + context.get = Mock(return_value="") + result_validation = ResultValidation() result = result_validation.execute( input={"Mocked": "Result"}, context=context, logger=logger ) - context.query_exec_tracker.add_step.assert_called_with( - { - "type": "Validating Output", - "success": True, - "message": "Output Validation Successful", - } - ) assert isinstance(result_validation, ResultValidation) - assert result == {"Mocked": "Result"} + assert result.output == {"Mocked": "Result"} + assert result.success is True + assert result.message == "Output Validation Successful" def test_result_is_of_dict_type_and_not_valid(self, context, logger): # Test Flow : Code Execution Successful with no exceptions result_validation = ResultValidation() - output_type_helper = Mock() + output_type = Mock() - context._query_exec_tracker = Mock() - context.query_exec_tracker.get_execution_time = Mock() - context.get_intermediate_value = Mock(return_value=output_type_helper) - output_type_helper.validate = Mock(return_value=(False, "Mocked Logs")) + context.get = Mock(return_value=output_type) + output_type.validate = Mock(return_value=(False, "Mocked Logs")) result = result_validation.execute( input={"Mocked": "Result"}, context=context, logger=logger ) - context.query_exec_tracker.add_step.assert_called_with( - { - "type": "Validating Output", - "success": False, - "message": "Output Validation Failed", - } - ) assert isinstance(result_validation, ResultValidation) - assert result == {"Mocked": "Result"} + assert result.output == {"Mocked": "Result"} + assert result.success is False + assert result.message == "Output Validation Failed" diff --git a/tests/unit_tests/pipelines/smart_datalake/test_validate_pipeline_input.py b/tests/unit_tests/pipelines/smart_datalake/test_validate_pipeline_input.py new file mode 100644 index 000000000..33e006018 --- /dev/null +++ b/tests/unit_tests/pipelines/smart_datalake/test_validate_pipeline_input.py @@ -0,0 +1,196 @@ +from typing import Optional +from unittest.mock import patch + +import pandas as pd +import pytest + +from pandasai.connectors.sql import ( + PostgreSQLConnector, + SQLConnector, + SQLConnectorConfig, +) +from pandasai.exceptions import InvalidConfigError +from pandasai.helpers.logger import Logger +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.chat.validate_pipeline_input import ( + ValidatePipelineInput, +) +from pandasai.pipelines.logic_unit_output import LogicUnitOutput +from pandasai.pipelines.pipeline_context import PipelineContext + + +class TestValidatePipelineInput: + "Unit test for Validate Pipeline Input" + + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + @patch("pandasai.connectors.sql.create_engine", autospec=True) + def sql_connector(self, create_engine): + # Define your ConnectorConfig instance here + self.config = SQLConnectorConfig( + dialect="mysql", + driver="pymysql", + username="your_username", + password="your_password", + host="your_host", + port=443, + database="your_database", + table="your_table", + where=[["column_name", "=", "value"]], + ).dict() + + # Create an instance of SQLConnector + return SQLConnector(self.config) + + @pytest.fixture + @patch("pandasai.connectors.sql.create_engine", autospec=True) + def pgsql_connector(self, create_engine): + # Define your ConnectorConfig instance here + self.config = SQLConnectorConfig( + dialect="pgsql", + driver="pymysql", + username="your_username", + password="your_password", + host="your_host", + port=443, + database="your_database", + table="your_table", + where=[["column_name", "=", "value"]], + ).dict() + + # Create an instance of SQLConnector + return PostgreSQLConnector(self.config) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + return PipelineContext([sample_df], config) + + @pytest.fixture + def logger(self): + return Logger(True, False) + + def test_init(self, context, config): + # Test the initialization of the CodeGenerator + code_generator = ValidatePipelineInput() + assert isinstance(code_generator, ValidatePipelineInput) + + def test_validate_input_with_direct_sql_false_and_non_connector( + self, context, logger + ): + input_validator = ValidatePipelineInput() + + result = input_validator.execute(input="test", context=context, logger=logger) + + assert result.output == "test" + + def test_validate_input_with_direct_sql_true_and_non_connector( + self, sample_df, llm, logger + ): + input_validator = ValidatePipelineInput() + + # context for true config + config = {"llm": llm, "enable_cache": True, "direct_sql": True} + + context = PipelineContext([sample_df], config) + with pytest.raises(InvalidConfigError): + input_validator.execute(input="test", context=context, logger=logger) + + def test_validate_input_with_direct_sql_false_and_connector( + self, sample_df, llm, logger, sql_connector + ): + input_validator = ValidatePipelineInput() + + # context for true config + config = {"llm": llm, "enable_cache": True, "direct_sql": False} + + context = PipelineContext([sample_df, sql_connector], config) + result = input_validator.execute(input="test", context=context, logger=logger) + assert isinstance(result, LogicUnitOutput) + assert result.output == "test" + + def test_validate_input_with_direct_sql_true_and_connector( + self, sample_df, llm, logger, sql_connector + ): + input_validator = ValidatePipelineInput() + + # context for true config + config = {"llm": llm, "enable_cache": True, "direct_sql": True} + + context = PipelineContext([sql_connector], config) + result = input_validator.execute(input="test", context=context, logger=logger) + assert isinstance(result, LogicUnitOutput) + assert result.output == "test" + + def test_validate_input_with_direct_sql_true_and_connector_pandasdf( + self, sample_df, llm, logger, sql_connector + ): + input_validator = ValidatePipelineInput() + + # context for true config + config = {"llm": llm, "enable_cache": True, "direct_sql": True} + + context = PipelineContext([sample_df, sql_connector], config) + with pytest.raises(InvalidConfigError): + input_validator.execute(input="test", context=context, logger=logger) + + def test_validate_input_with_direct_sql_true_and_different_type_connector( + self, pgsql_connector, llm, logger, sql_connector + ): + input_validator = ValidatePipelineInput() + + # context for true config + config = {"llm": llm, "enable_cache": True, "direct_sql": True} + + context = PipelineContext([pgsql_connector, sql_connector], config) + with pytest.raises(InvalidConfigError): + input_validator.execute(input="test", context=context, logger=logger) diff --git a/tests/unit_tests/pipelines/test_pipeline.py b/tests/unit_tests/pipelines/test_pipeline.py index ab3b7ae11..16bb93317 100644 --- a/tests/unit_tests/pipelines/test_pipeline.py +++ b/tests/unit_tests/pipelines/test_pipeline.py @@ -4,12 +4,12 @@ import pandas as pd import pytest +from pandasai.connectors import BaseConnector, PandasConnector from pandasai.llm.fake import FakeLLM from pandasai.pipelines.base_logic_unit import BaseLogicUnit from pandasai.pipelines.pipeline import Pipeline from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.schemas.df_config import Config -from pandasai.smart_dataframe import SmartDataframe class MockLogicUnit(BaseLogicUnit): @@ -66,8 +66,8 @@ def sample_df(self): ) @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": False}) + def dataframe(self, sample_df): + return PandasConnector({"original_df": sample_df}) @pytest.fixture def config(self, llm): @@ -81,23 +81,23 @@ def test_init(self, context, config): # Test the initialization of the Pipeline pipeline = Pipeline(context) assert isinstance(pipeline, Pipeline) - assert pipeline._context._config == Config(**config) + assert pipeline._context.config == Config(**config) assert pipeline._context == context assert pipeline._steps == [] - def test_init_with_smartdfs(self, smart_dataframe, config): + def test_init_with_agent(self, dataframe, config): # Test the initialization of the Pipeline - pipeline = Pipeline([smart_dataframe], config=config) + pipeline = Pipeline([dataframe], config=config) assert isinstance(pipeline, Pipeline) assert len(pipeline._context.dfs) == 1 - assert isinstance(pipeline._context.dfs[0], SmartDataframe) + assert isinstance(pipeline._context.dfs[0], BaseConnector) - def test_init_with_dfs(self, sample_df, config): + def test_init_with_dfs(self, dataframe, config): # Test the initialization of the Pipeline - pipeline = Pipeline([sample_df], config=config) + pipeline = Pipeline([dataframe], config=config) assert isinstance(pipeline, Pipeline) assert len(pipeline._context.dfs) == 1 - assert isinstance(pipeline._context.dfs[0], SmartDataframe) + assert isinstance(pipeline._context.dfs[0], BaseConnector) def test_add_step(self, context, config): # Test the add_step method diff --git a/tests/unit_tests/prompts/test_base_prompt.py b/tests/unit_tests/prompts/test_base_prompt.py deleted file mode 100644 index 6ad67c3fd..000000000 --- a/tests/unit_tests/prompts/test_base_prompt.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Unit tests for the base prompt class""" - -import pytest - -from pandasai.prompts import AbstractPrompt - - -class TestBasePrompt: - def test_instantiate_without_template(self): - with pytest.raises(TypeError): - AbstractPrompt() diff --git a/tests/unit_tests/prompts/test_correct_error_prompt.py b/tests/unit_tests/prompts/test_correct_error_prompt.py index 167c534c4..93fb1b993 100644 --- a/tests/unit_tests/prompts/test_correct_error_prompt.py +++ b/tests/unit_tests/prompts/test_correct_error_prompt.py @@ -3,7 +3,9 @@ import pandas as pd -from pandasai import SmartDataframe +from pandasai import Agent +from pandasai.connectors import PandasConnector +from pandasai.helpers.dataframe_serializer import DataframeSerializerType from pandasai.llm.fake import FakeLLM from pandasai.prompts import CorrectErrorPrompt @@ -14,18 +16,14 @@ class TestCorrectErrorPrompt: def test_str_with_args(self): """Test that the __str__ method is implemented""" - llm = FakeLLM("plt.show()") - dfs = [ - SmartDataframe( - pd.DataFrame({}), - config={"llm": llm}, - ) - ] + llm = FakeLLM() + agent = Agent( + dfs=[PandasConnector({"original_df": pd.DataFrame()})], + config={"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV}, + ) prompt = CorrectErrorPrompt( - engine="pandas", code="df.head()", error_returned="Error message" + context=agent.context, code="df.head()", error="Error message" ) - prompt.set_var("dfs", dfs) - prompt.set_var("conversation", "What is the correct code?") prompt_content = prompt.to_string() if sys.platform.startswith("win"): prompt_content = prompt_content.replace("\r\n", "\n") @@ -38,7 +36,7 @@ def test_str_with_args(self): The user asked the following question: -What is the correct code? + You generated this python code: df.head() @@ -48,3 +46,27 @@ def test_str_with_args(self): Fix the python code above and return the new python code:""" # noqa: E501 ) + + def test_to_json(self): + """Test that the __str__ method is implemented""" + + llm = FakeLLM() + agent = Agent( + dfs=[PandasConnector({"original_df": pd.DataFrame()})], + config={"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV}, + ) + prompt = CorrectErrorPrompt( + context=agent.context, code="df.head()", error="Error message" + ) + + assert prompt.to_json() == { + "datasets": [{"name": None, "description": None, "head": []}], + "conversation": [], + "system_prompt": None, + "error": { + "code": "df.head()", + "error_trace": "Error message", + "exception_type": "Exception", + }, + "config": {"direct_sql": False}, + } diff --git a/tests/unit_tests/prompts/test_generate_python_code_prompt.py b/tests/unit_tests/prompts/test_generate_python_code_prompt.py index 40dd4fa96..b2b22f456 100644 --- a/tests/unit_tests/prompts/test_generate_python_code_prompt.py +++ b/tests/unit_tests/prompts/test_generate_python_code_prompt.py @@ -1,20 +1,15 @@ """Unit tests for the generate python code prompt class""" + +import os import sys +from unittest.mock import patch import pandas as pd import pytest -from pandasai import SmartDataframe -from pandasai.helpers.output_types import ( - DefaultOutputType, - output_type_factory, - output_types_map, -) -from pandasai.helpers.viz_library_types import ( - MatplotlibVizLibraryType, - viz_lib_map, - viz_lib_type_factory, -) +from pandasai import Agent +from pandasai.connectors import PandasConnector +from pandasai.helpers.dataframe_serializer import DataframeSerializerType from pandasai.llm.fake import FakeLLM from pandasai.prompts import GeneratePythonCodePrompt @@ -23,59 +18,240 @@ class TestGeneratePythonCodePrompt: """Unit tests for the generate python code prompt class""" @pytest.mark.parametrize( - "save_charts_path,output_type_hint,viz_library_type_hint", + "output_type,output_type_template", [ - ( - "exports/charts", - DefaultOutputType().template_hint, - MatplotlibVizLibraryType().template_hint, - ), - ( - "custom/dir/for/charts", - DefaultOutputType().template_hint, - MatplotlibVizLibraryType().template_hint, - ), *[ ( - "exports/charts", - output_type_factory(type_).template_hint, - viz_lib_type_factory(viz_type_).template_hint, - ) - for type_ in output_types_map - for viz_type_ in viz_lib_map - ], + None, + """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "number", + """type (must be "number"), value must int. Example: { "type": "number", "value": 125 }""", + ), + ( + "dataframe", + """type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }""", + ), + ( + "plot", + """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "string", + """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""", + ), + ] ], ) - def test_str_with_args( - self, save_charts_path, output_type_hint, viz_library_type_hint + def test_str_with_args(self, output_type, output_type_template): + """Test casting of prompt to string and interpolation of context. + + Args: + output_type (str): output type + output_type_template (str): output type template + + Returns: + None + """ + + os.environ["PANDASAI_API_URL"] = "" + os.environ["PANDASAI_API_KEY"] = "" + + llm = FakeLLM() + agent = Agent( + PandasConnector({"original_df": pd.DataFrame({"a": [1], "b": [4]})}), + config={"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV}, + ) + prompt = GeneratePythonCodePrompt( + context=agent.context, + output_type=output_type, + ) + + expected_prompt_content = f""" +dfs[0]:1x2 +a,b +1,4 + + + + +Update this initial code: +```python +# TODO: import the required dependencies +import pandas as pd + +# Write code here + +# Declare result var: +{output_type_template} + +``` + + + + + +Variable `dfs: list[pd.DataFrame]` is already declared. + +At the end, declare "result" variable as a dictionary of type and value. + + +Generate python code and return full updated code:""" # noqa E501 + actual_prompt_content = prompt.to_string() + if sys.platform.startswith("win"): + actual_prompt_content = actual_prompt_content.replace("\r\n", "\n") + assert actual_prompt_content == expected_prompt_content + + @pytest.mark.parametrize( + "output_type,output_type_template", + [ + *[ + ( + None, + """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "number", + """type (must be "number"), value must int. Example: { "type": "number", "value": 125 }""", + ), + ( + "dataframe", + """type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }""", + ), + ( + "plot", + """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "string", + """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""", + ), + ] + ], + ) + @patch("pandasai.vectorstores.bamboo_vectorstore.BambooVectorStore") + def test_str_with_train_qa(self, chromadb_mock, output_type, output_type_template): + """Test casting of prompt to string and interpolation of context. + + Args: + output_type (str): output type + output_type_template (str): output type template + + Returns: + None + """ + + os.environ["PANDASAI_API_URL"] = "SERVER_URL" + os.environ["PANDASAI_API_KEY"] = "API_KEY" + + chromadb_instance = chromadb_mock.return_value + chromadb_instance.get_relevant_qa_documents.return_value = [["query1"]] + llm = FakeLLM() + agent = Agent( + PandasConnector({"original_df": pd.DataFrame({"a": [1], "b": [4]})}), + config={"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV}, + ) + agent.train(["query1"], ["code1"]) + prompt = GeneratePythonCodePrompt( + context=agent.context, + output_type=output_type, + ) + + expected_prompt_content = f""" +dfs[0]:1x2 +a,b +1,4 + + + + +Update this initial code: +```python +# TODO: import the required dependencies +import pandas as pd + +# Write code here + +# Declare result var: +{output_type_template} + +``` + + +You can utilize these examples as a reference for generating code. + +['query1'] + + + + + +Variable `dfs: list[pd.DataFrame]` is already declared. + +At the end, declare "result" variable as a dictionary of type and value. + + +Generate python code and return full updated code:""" # noqa E501 + actual_prompt_content = prompt.to_string() + if sys.platform.startswith("win"): + actual_prompt_content = actual_prompt_content.replace("\r\n", "\n") + + assert actual_prompt_content == expected_prompt_content + + @pytest.mark.parametrize( + "output_type,output_type_template", + [ + *[ + ( + None, + """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "number", + """type (must be "number"), value must int. Example: { "type": "number", "value": 125 }""", + ), + ( + "dataframe", + """type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }""", + ), + ( + "plot", + """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "string", + """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""", + ), + ] + ], + ) + @patch("pandasai.vectorstores.bamboo_vectorstore.BambooVectorStore") + def test_str_with_train_docs( + self, chromadb_mock, output_type, output_type_template ): """Test casting of prompt to string and interpolation of context. - Parameterized for the following cases: - * `save_charts_path` is "exports/charts", `output_type_hint` is default, - `viz_library_type_hint` is default - * `save_charts_path` is "custom/dir/for/charts", `output_type_hint` - is default, `viz_library_type_hint` is default - * `save_charts_path` is "exports/charts", `output_type_hint` any of - possible types in `pandasai.helpers.output_types.output_types_map`, - `viz_library_type_hint` any of - possible types in `pandasai.helpers.viz_library_types.viz_library_types_map` + Args: + output_type (str): output type + output_type_template (str): output type template + + Returns: + None """ - llm = FakeLLM("plt.show()") - dfs = [ - SmartDataframe( - pd.DataFrame({"a": [1], "b": [4]}), - config={"llm": llm}, - ) - ] - prompt = GeneratePythonCodePrompt() - prompt.set_var("dfs", dfs) - prompt.set_var("last_message", "Q: Question") - prompt.set_var("save_charts_path", save_charts_path) - prompt.set_var("output_type_hint", output_type_hint) - prompt.set_var("viz_library_type", viz_library_type_hint) - prompt.set_var("skills", "") + chromadb_instance = chromadb_mock.return_value + chromadb_instance.get_relevant_docs_documents.return_value = [["query1"]] + llm = FakeLLM() + agent = Agent( + PandasConnector({"original_df": pd.DataFrame({"a": [1], "b": [4]})}), + config={"llm": llm, "dataframe_serializer": DataframeSerializerType.CSV}, + ) + agent.train(docs=["document1"]) + prompt = GeneratePythonCodePrompt( + context=agent.context, + output_type=output_type, + ) expected_prompt_content = f""" dfs[0]:1x2 @@ -85,6 +261,116 @@ def test_str_with_args( +Update this initial code: +```python +# TODO: import the required dependencies +import pandas as pd + +# Write code here + +# Declare result var: +{output_type_template} + +``` + + + + + +Here are additional documents for reference. Feel free to use them to answer. +['query1'] + + + +Variable `dfs: list[pd.DataFrame]` is already declared. + +At the end, declare "result" variable as a dictionary of type and value. + + +Generate python code and return full updated code:""" # noqa E501 + actual_prompt_content = prompt.to_string() + if sys.platform.startswith("win"): + actual_prompt_content = actual_prompt_content.replace("\r\n", "\n") + + assert actual_prompt_content == expected_prompt_content + + @pytest.mark.parametrize( + "output_type,output_type_template", + [ + *[ + ( + None, + """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "number", + """type (must be "number"), value must int. Example: { "type": "number", "value": 125 }""", + ), + ( + "dataframe", + """type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }""", + ), + ( + "plot", + """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "string", + """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""", + ), + ] + ], + ) + @patch("pandasai.vectorstores.bamboo_vectorstore.BambooVectorStore") + def test_str_with_train_docs_and_qa( + self, chromadb_mock, output_type, output_type_template + ): + """Test casting of prompt to string and interpolation of context. + + Args: + output_type (str): output type + output_type_template (str): output type template + + Returns: + None + """ + + os.environ["PANDASAI_API_URL"] = "SERVER_URL" + os.environ["PANDASAI_API_KEY"] = "API_KEY" + + chromadb_instance = chromadb_mock.return_value + chromadb_instance.get_relevant_docs_documents.return_value = [["documents1"]] + chromadb_instance.get_relevant_qa_documents.return_value = [["query1"]] + llm = FakeLLM() + agent = Agent( + PandasConnector({"original_df": pd.DataFrame({"a": [1], "b": [4]})}), + config={"llm": llm}, + ) + agent.train(queries=["query1"], codes=["code1"], docs=["document1"]) + prompt = GeneratePythonCodePrompt( + context=agent.context, + output_type=output_type, + ) + + expected_prompt_content = f"""dfs[0]: + name: null + description: null + type: pd.DataFrame + rows: 1 + columns: 2 + schema: + fields: + - name: a + type: int64 + samples: + - 1 + - name: b + type: int64 + samples: + - 4 + + + Update this initial code: ```python @@ -93,37 +379,64 @@ def test_str_with_args( # Write code here -# Declare result var: {output_type_hint} +# Declare result var: +{output_type_template} + ``` -Q: Question + +You can utilize these examples as a reference for generating code. + +['query1'] + +Here are additional documents for reference. Feel free to use them to answer. +['documents1'] + + + Variable `dfs: list[pd.DataFrame]` is already declared. At the end, declare "result" variable as a dictionary of type and value. -{viz_library_type_hint} Generate python code and return full updated code:""" # noqa E501 actual_prompt_content = prompt.to_string() if sys.platform.startswith("win"): actual_prompt_content = actual_prompt_content.replace("\r\n", "\n") + assert actual_prompt_content == expected_prompt_content - def test_custom_instructions(self): - custom_instructions = """Analyze the data. -1. Load: Load the data from a file or database -2. Prepare: Preprocessing and cleaning data if necessary -3. Process: Manipulating data for analysis (grouping, filtering, aggregating, etc.) -4. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)""" # noqa: E501 - - prompt = GeneratePythonCodePrompt(custom_instructions=custom_instructions) - actual_instructions = prompt._args["instructions"] - - assert ( - actual_instructions - == """Analyze the data. -1. Load: Load the data from a file or database -2. Prepare: Preprocessing and cleaning data if necessary -3. Process: Manipulating data for analysis (grouping, filtering, aggregating, etc.) -4. Analyze: Conducting the actual analysis (if the user asks to plot a chart you must save it as an image in temp_chart.png and not show the chart.)""" # noqa: E501 + @patch("pandasai.vectorstores.bamboo_vectorstore.BambooVectorStore") + def test_str_geenerate_code_prompt_to_json(self, chromadb_mock): + """Test casting of prompt to string and interpolation of context. + + Args: + output_type (str): output type + output_type_template (str): output type template + + Returns: + None + """ + + chromadb_instance = chromadb_mock.return_value + chromadb_instance.get_relevant_docs_documents.return_value = [["documents1"]] + chromadb_instance.get_relevant_qa_documents.return_value = [["query1"]] + llm = FakeLLM() + agent = Agent( + PandasConnector({"original_df": pd.DataFrame({"a": [1], "b": [4]})}), + config={"llm": llm}, + ) + agent.train(queries=["query1"], codes=["code1"], docs=["document1"]) + prompt = GeneratePythonCodePrompt( + context=agent.context, viz_lib="", output_type=None ) + print(prompt.to_json()) + + assert prompt.to_json() == { + "datasets": [ + {"name": None, "description": None, "head": [{"a": 1, "b": 4}]} + ], + "conversation": [], + "system_prompt": None, + "config": {"direct_sql": False, "viz_lib": "", "output_type": None}, + } diff --git a/tests/unit_tests/prompts/test_sql_prompt.py b/tests/unit_tests/prompts/test_sql_prompt.py index fac63da56..8737c9889 100644 --- a/tests/unit_tests/prompts/test_sql_prompt.py +++ b/tests/unit_tests/prompts/test_sql_prompt.py @@ -1,117 +1,111 @@ """Unit tests for the correct error prompt class""" +import os import sys import pandas as pd import pytest -from pandasai import SmartDataframe -from pandasai.helpers.output_types import ( - DefaultOutputType, - output_type_factory, - output_types_map, -) -from pandasai.helpers.viz_library_types import ( - MatplotlibVizLibraryType, - viz_lib_map, - viz_lib_type_factory, -) +from pandasai import Agent from pandasai.llm.fake import FakeLLM -from pandasai.prompts.direct_sql_prompt import DirectSQLPrompt +from pandasai.prompts.generate_python_code_with_sql import ( + GeneratePythonCodeWithSQLPrompt, +) -class TestDirectSqlPrompt: +class TestGeneratePythonCodeWithSQLPrompt: """Unit tests for the correct error prompt class""" @pytest.mark.parametrize( - "save_charts_path,output_type_hint,viz_library_type_hint", + "output_type,output_type_template", [ - ( - "exports/charts", - DefaultOutputType().template_hint, - MatplotlibVizLibraryType().template_hint, - ), - ( - "custom/dir/for/charts", - DefaultOutputType().template_hint, - MatplotlibVizLibraryType().template_hint, - ), *[ ( - "exports/charts", - output_type_factory(type_).template_hint, - viz_lib_type_factory(viz_type_).template_hint, - ) - for type_ in output_types_map - for viz_type_ in viz_lib_map - ], + "", + """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "number", + """type (must be "number"), value must int. Example: { "type": "number", "value": 125 }""", + ), + ( + "dataframe", + """type (must be "dataframe"), value must be pd.DataFrame or pd.Series. Example: { "type": "dataframe", "value": pd.DataFrame({...}) }""", + ), + ( + "plot", + """type (must be "plot"), value must be string. Example: { "type": "plot", "value": "temp_chart.png" }""", + ), + ( + "string", + """type (must be "string"), value must be string. Example: { "type": "string", "value": f"The highest salary is {highest_salary}." }""", + ), + ] ], ) - def test_direct_sql_prompt_with_params( - self, save_charts_path, output_type_hint, viz_library_type_hint - ): + def test_str_with_args(self, output_type, output_type_template): """Test that the __str__ method is implemented""" - llm = FakeLLM("plt.show()") - dfs = [ - SmartDataframe( - pd.DataFrame({}), - config={"llm": llm}, - ) - ] - - prompt = DirectSQLPrompt(tables=dfs) - prompt.set_var("dfs", dfs) - prompt.set_var("conversation", "What is the correct code?") - prompt.set_var("output_type_hint", output_type_hint) - prompt.set_var("save_charts_path", save_charts_path) - prompt.set_var("viz_library_type", viz_library_type_hint) - prompt.set_var( - "current_code", - """# TODO: import the required dependencies -import pandas as pd""", - ) - prompt.set_var("skills", "") - prompt.set_var( - "reasoning", - """Take a deep breath and reason step-by-step. Act as a senior data analyst. -In the answer, you must never write the "technical" names of the tables. -Based on the last message in the conversation: - -- return the updated analyze_data function wrapped within `python `""", + os.environ["PANDASAI_API_URL"] = "" + os.environ["PANDASAI_API_KEY"] = "" + + llm = FakeLLM() + agent = Agent(pd.DataFrame(), config={"llm": llm}) + prompt = GeneratePythonCodeWithSQLPrompt( + context=agent.context, + output_type=output_type, ) prompt_content = prompt.to_string() if sys.platform.startswith("win"): prompt_content = prompt_content.replace("\r\n", "\n") + print(prompt_content) + assert ( prompt_content == f''' - - +
+dfs[0]: + name: null + description: null + type: pd.DataFrame + rows: 0 + columns: 0 + schema: + fields: []
+ You can call the following functions that have been pre-defined for you: + def execute_sql_query(sql_query: str) -> pd.Dataframe """This method connects to the database, executes the sql query and returns the dataframe""" - - Update this initial code: ```python # TODO: import the required dependencies import pandas as pd + +# Write code here + +# Declare result var: +{output_type_template} + ``` + +Variable `dfs: list[pd.DataFrame]` is already declared. + At the end, declare "result" variable as a dictionary of type and value. -{viz_library_type_hint} -Generate python code and return full updated code:''' # noqa: E501 +Generate python code and return full updated code: + +### Note: Use only relevant table for query and do aggregation, sorting, joins and grouby through sql query''' # noqa: E501 ) diff --git a/tests/unit_tests/skills/test_skills.py b/tests/unit_tests/skills/test_skills.py index 007b5043f..d0e2bb347 100644 --- a/tests/unit_tests/skills/test_skills.py +++ b/tests/unit_tests/skills/test_skills.py @@ -5,12 +5,11 @@ import pandas as pd import pytest -from pandasai.agent import Agent +from pandasai import Agent from pandasai.helpers.code_manager import CodeExecutionContext, CodeManager from pandasai.helpers.skills_manager import SkillsManager from pandasai.llm.fake import FakeLLM from pandasai.skills import Skill, skill -from pandasai.smart_dataframe import SmartDataframe class TestSkills: @@ -62,12 +61,12 @@ def sample_df(self): ) @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": False}) - - @pytest.fixture - def code_manager(self, smart_dataframe: SmartDataframe): - return smart_dataframe.lake._code_manager + def code_manager(self, agent: Agent): + return CodeManager( + agent.context.dfs, + config=agent.context.config, + logger=agent.logger, + ) @pytest.fixture def exec_context(self) -> MagicMock: @@ -165,12 +164,45 @@ def test_prompt_display(self): ) # Test prompt_display method when no skills exist - skills_manager._skills = [] + skills_manager.skills = [] prompt = skills_manager.prompt_display() assert prompt is None @patch("pandasai.skills.inspect.signature", return_value="(a, b, c)") - def test_skill_decorator_test_code(self, llm): + def test_skill_decorator(self, _mock_inspect_signature): + # Define skills using the decorator + @skill + def skill_a(*args, **kwargs): + """ + Test skill A + Args: + arg(str) + """ + return "SkillA Result" + + @skill + def skill_b(*args, **kwargs): + """ + Test skill B + Args: + arg(str) + """ + return "SkillB Result" + + # Test the wrapped functions + assert skill_a() == "SkillA Result" + assert skill_b() == "SkillB Result" + + # Test the additional attributes added by the decorator + assert skill_a.name == "skill_a" + assert skill_b.name == "skill_b" + + # check the function definition + assert skill_a._signature == "def skill_a(a, b, c):" + assert skill_b._signature == "def skill_b(a, b, c):" + + @patch("pandasai.skills.inspect.signature", return_value="(a, b, c)") + def test_skill_decorator_test_code(self, _mock_inspect_signature): # Define skills using the decorator @skill def plot_salaries(*args, **kwargs): @@ -200,32 +232,15 @@ def skill_a(*args, **kwargs): ) agent.add_skills(skill_a) - assert len(agent._lake._skills.skills) == 1 + assert len(agent.context.skills_manager.skills) == 1 - agent._lake._skills._skills = [] + agent.context.skills_manager.skills = [] agent.add_skills(skill_a, skill_b) - assert len(agent._lake._skills.skills) == 2 - - def test_add_skills_with_smartDataframe(self, smart_dataframe: SmartDataframe): - @skill - def skill_a(*args, **kwargs): - """Skill A""" - return "SkillA Result" - - skill_b = Skill.from_function( - func=lambda _: "SkillB Result", description="Skill B" - ) - - smart_dataframe.add_skills(skill_a) - assert len(smart_dataframe._lake._skills.skills) == 1 - - smart_dataframe._lake._skills._skills = [] - smart_dataframe.add_skills(skill_a, skill_b) - assert len(smart_dataframe._lake._skills.skills) == 2 + assert len(agent.context.skills_manager.skills) == 2 def test_run_prompt(self, llm): df = pd.DataFrame({"country": []}) - df = SmartDataframe(df, config={"llm": llm, "enable_cache": False}) + df = Agent([df], config={"llm": llm, "enable_cache": False}) function_def = """ @@ -251,6 +266,7 @@ def plot_salaries(merged_df: pd.DataFrame): df.chat("How many countries are in the dataframe?") last_prompt = df.last_prompt + assert function_def in last_prompt def test_run_prompt_agent(self, agent): @@ -277,14 +293,14 @@ def plot_salaries(merged_df: pd.DataFrame): agent.add_skills(plot_salaries) agent.chat("How many countries are in the dataframe?") - last_prompt = agent._lake.last_prompt + last_prompt = agent.last_prompt assert function_def in last_prompt def test_run_prompt_without_skills(self, agent): agent.chat("How many countries are in the dataframe?") - last_prompt = agent._lake.last_prompt + last_prompt = agent.last_prompt assert "" not in last_prompt assert "" not in last_prompt @@ -299,9 +315,9 @@ def test_code_exec_with_skills_no_use( code = """result = {'type': 'number', 'value': 1 + 1}""" skill1 = MagicMock() skill1.name = "SkillA" - exec_context._skills_manager._skills = [skill1] + exec_context.skills_manager.skills = [skill1] code_manager.execute_code(code, exec_context) - assert len(exec_context._skills_manager.used_skills) == 0 + assert len(exec_context.skills_manager.used_skills) == 0 def test_code_exec_with_skills(self, code_manager: CodeManager): code = """plot_salaries() @@ -317,6 +333,6 @@ def plot_salaries() -> str: exec_context = CodeExecutionContext(uuid.uuid4(), sm) result = code_manager.execute_code(code, exec_context) - assert len(exec_context._skills_manager.used_skills) == 1 - assert exec_context._skills_manager.used_skills[0] == "plot_salaries" + assert len(exec_context.skills_manager.used_skills) == 1 + assert exec_context.skills_manager.used_skills[0] == "plot_salaries" assert result == {"type": "number", "value": 1 + 1} diff --git a/tests/unit_tests/test_codemanager.py b/tests/unit_tests/test_codemanager.py index 9c21455aa..8607f6d75 100644 --- a/tests/unit_tests/test_codemanager.py +++ b/tests/unit_tests/test_codemanager.py @@ -1,24 +1,31 @@ """Unit tests for the CodeManager class""" import ast +import os import uuid from typing import Optional -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import MagicMock, patch import pandas as pd import pytest -from pandasai.connectors.base import SQLConnectorConfig -from pandasai.connectors.sql import PostgreSQLConnector, SQLConnector +from pandasai import Agent +from pandasai.connectors.sql import ( + PostgreSQLConnector, + SQLConnector, + SQLConnectorConfig, +) from pandasai.exceptions import ( BadImportError, + ExecuteSQLQueryNotUsed, InvalidConfigError, MaliciousQueryError, NoCodeFoundError, ) from pandasai.helpers.code_manager import CodeExecutionContext, CodeManager +from pandasai.helpers.logger import Logger from pandasai.helpers.skills_manager import SkillsManager from pandasai.llm.fake import FakeLLM -from pandasai.smart_dataframe import SmartDataframe +from pandasai.schemas.df_config import Config class TestCodeManager: @@ -72,12 +79,35 @@ def sample_df(self): ) @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": False}) + def logger(self): + return Logger() @pytest.fixture - def code_manager(self, smart_dataframe: SmartDataframe): - return smart_dataframe.lake._code_manager + def config_with_direct_sql(self): + return Config( + llm=FakeLLM(output=""), + enable_cache=False, + direct_sql=True, + ) + + @pytest.fixture + def agent(self, llm, sample_df): + return Agent([sample_df], config={"llm": llm, "enable_cache": False}) + + @pytest.fixture + def agent_with_connector(self, llm, pgsql_connector: PostgreSQLConnector): + return Agent( + [pgsql_connector], + config={"llm": llm, "enable_cache": False, "direct_sql": True}, + ) + + @pytest.fixture + def code_manager(self, agent: Agent): + return CodeManager( + dfs=agent.context.dfs, + config=agent.context.config, + logger=agent.logger, + ) @pytest.fixture def exec_context(self) -> MagicMock: @@ -119,7 +149,7 @@ def pgsql_connector(self, create_engine): ).dict() # Create an instance of SQLConnector - return PostgreSQLConnector(self.config) + return PostgreSQLConnector(self.config, name="your_table") def test_run_code_for_calculations( self, code_manager: CodeManager, exec_context: MagicMock @@ -203,21 +233,22 @@ def test_remove_dfs_overwrites( == """print(dfs)""" ) - def test_exception_handling( - self, smart_dataframe: SmartDataframe, code_manager: CodeManager - ): - code_manager.execute_code = Mock( - side_effect=NoCodeFoundError("No code found in the answer.") - ) - code_manager.execute_code.__name__ = "execute_code" + @patch( + "pandasai.pipelines.chat.code_execution.CodeManager.execute_code", + autospec=True, + ) + def test_exception_handling(self, mock_execute_code: MagicMock, agent: Agent): + os.environ["PANDASAI_API_URL"] = "" + os.environ["PANDASAI_API_KEY"] = "" - result = smart_dataframe.chat("How many countries are in the dataframe?") + mock_execute_code.side_effect = NoCodeFoundError("No code found in the answer.") + result = agent.chat("How many countries are in the dataframe?") assert result == ( "Unfortunately, I was not able to answer your question, " "because of the following error:\n" "\nNo code found in the answer.\n" ) - assert smart_dataframe.last_error == "No code found in the answer." + assert agent.last_error == "No code found in the answer." def test_custom_whitelisted_dependencies( self, code_manager: CodeManager, llm, exec_context: MagicMock @@ -237,7 +268,7 @@ def test_custom_whitelisted_dependencies( == """my_custom_library.do_something()""" ) - def test_get_environment(self, code_manager: CodeManager, smart_dataframe): + def test_get_environment(self, code_manager: CodeManager): code_manager._additional_dependencies = [ {"name": "pyplot", "alias": "plt", "module": "matplotlib"}, {"name": "numpy", "alias": "np", "module": "numpy"}, @@ -312,87 +343,6 @@ def test_get_environment(self, code_manager: CodeManager, smart_dataframe): "__name__": "__main__", } - @pytest.mark.parametrize( - "df_name, code", - [ - ( - "df", - """ -df = dfs[0] -filtered_df = df.filter( - (pl.col('loan_status') == 'PAIDOFF') & (pl.col('Gender') == 'male') -) -count = filtered_df.shape[0] -result = {'type': 'number', 'value': count} - """, - ), - ( - "foobar", - """ -foobar = dfs[0] -filtered_df = foobar.filter( - (pl.col('loan_status') == 'PAIDOFF') & (pl.col('Gender') == 'male') -) -count = filtered_df.shape[0] -result = {'type': 'number', 'value': count} - """, - ), - ], - ) - def test_extract_filters_polars(self, df_name, code, code_manager: CodeManager): - filters = code_manager._extract_filters(code) - assert isinstance(filters, dict) - assert "dfs[0]" in filters - assert isinstance(filters["dfs[0]"], list) - assert len(filters["dfs[0]"]) == 2 - - assert filters["dfs[0]"][0] == ("loan_status", "=", "PAIDOFF") - assert filters["dfs[0]"][1] == ("Gender", "=", "male") - - def test_extract_filters_polars_multiple_df(self, code_manager: CodeManager): - code = """ -df = dfs[0] -filtered_paid_df_male = df.filter( - (pl.col('loan_status') == 'PAIDOFF') & (pl.col('Gender') == 'male') -) -num_loans_paid_off_male = len(filtered_paid_df) - -df = dfs[1] -filtered_pend_df_male = df.filter( - (pl.col('loan_status') == 'PENDING') & (pl.col('Gender') == 'male') -) -num_loans_pending_male = len(filtered_pend_df) - -df = dfs[2] -filtered_paid_df_female = df.filter( - (pl.col('loan_status') == 'PAIDOFF') & (pl.col('Gender') == 'female') -) -num_loans_paid_off_female = len(filtered_pend_df) - -value = num_loans_paid_off + num_loans_pending + num_loans_paid_off_female -result = { - 'type': 'number', - 'value': value -} -""" - filters = code_manager._extract_filters(code) - assert isinstance(filters, dict) - assert "dfs[0]" in filters - assert "dfs[1]" in filters - assert "dfs[2]" in filters - assert isinstance(filters["dfs[0]"], list) - assert len(filters["dfs[0]"]) == 2 - assert len(filters["dfs[1]"]) == 2 - - assert filters["dfs[0]"][0] == ("loan_status", "=", "PAIDOFF") - assert filters["dfs[0]"][1] == ("Gender", "=", "male") - - assert filters["dfs[1]"][0] == ("loan_status", "=", "PENDING") - assert filters["dfs[1]"][1] == ("Gender", "=", "male") - - assert filters["dfs[2]"][0] == ("loan_status", "=", "PAIDOFF") - assert filters["dfs[2]"][1] == ("Gender", "=", "female") - @pytest.mark.parametrize("df_name", ["df", "foobar"]) def test_extract_filters_col_index(self, df_name, code_manager: CodeManager): code = f""" @@ -414,45 +364,6 @@ def test_extract_filters_col_index(self, df_name, code_manager: CodeManager): assert filters["dfs[0]"][0] == ("loan_status", "=", "PAIDOFF") assert filters["dfs[0]"][1] == ("Gender", "=", "male") - @pytest.mark.parametrize( - "df_name, code", - [ - ( - "df", - """ -df = dfs[0] -filtered_df = df.filter( - (pl.col('loan_status') == 'PAIDOFF') & (pl.col('Gender') == 'male') -) -count = filtered_df.shape[0] -result = {'type': 'number', 'value': count} - """, - ), - ( - "foobar", - """ -foobar = dfs[0] -filtered_df = foobar[( - foobar['loan_status'] == 'PAIDOFF' -) & (df['Gender'] == 'male')] -num_loans = len(filtered_df) -result = {'type': 'number', 'value': num_loans} - """, - ), - ], - ) - def test_extract_filters_col_index_non_default_name( - self, df_name, code, code_manager: CodeManager - ): - filters = code_manager._extract_filters(code) - assert isinstance(filters, dict) - assert "dfs[0]" in filters - assert isinstance(filters["dfs[0]"], list) - assert len(filters["dfs[0]"]) == 2 - - assert filters["dfs[0]"][0] == ("loan_status", "=", "PAIDOFF") - assert filters["dfs[0]"][1] == ("Gender", "=", "male") - def test_extract_filters_col_index_multiple_df(self, code_manager: CodeManager): code = """ df = dfs[0] @@ -504,25 +415,17 @@ def test_validate_true_direct_sql_with_two_different_connector( # raise exception when two different connector with pytest.raises(InvalidConfigError): code_manager._config.direct_sql = True - df1 = SmartDataframe( - sql_connector, - config={"llm": FakeLLM(output="")}, - ) - df2 = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output="")}, - ) - code_manager._validate_direct_sql([df1, df2]) + code_manager._validate_direct_sql([sql_connector, pgsql_connector]) def test_clean_code_direct_sql_code( - self, pgsql_connector: PostgreSQLConnector, exec_context: MagicMock + self, exec_context: MagicMock, agent_with_connector: Agent ): """Test that the direct SQL function definition is removed when 'direct_sql' is True""" - df = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output=""), "direct_sql": True}, + code_manager = CodeManager( + dfs=agent_with_connector.context.dfs, + config=agent_with_connector.context.config, + logger=agent_with_connector.logger, ) - code_manager = df.lake._code_manager safe_code = """ import numpy as np def execute_sql_query(sql_query: str) -> pd.DataFrame: @@ -531,19 +434,17 @@ def execute_sql_query(sql_query: str) -> pd.DataFrame: # return the result as a dataframe return pd.DataFrame() np.array() +execute_sql_query() """ - assert code_manager._clean_code(safe_code, exec_context) == "np.array()" + assert ( + code_manager._clean_code(safe_code, exec_context) + == "np.array()\nexecute_sql_query()" + ) def test_clean_code_direct_sql_code_false( - self, pgsql_connector: PostgreSQLConnector, exec_context: MagicMock + self, exec_context: MagicMock, code_manager ): """Test that the direct SQL function definition is removed when 'direct_sql' is False""" - df = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output=""), "direct_sql": False}, - ) - code_manager = df.lake._code_manager - safe_code = """ import numpy as np def execute_sql_query(sql_query: str) -> pd.DataFrame: @@ -577,7 +478,7 @@ def test_check_is_query_using_relevant_table_valid_query( mock_node = ast.parse("sql_query = 'SELECT * FROM allowed_table'").body[0] class MockObject: - table_name = "allowed_table" + name = "allowed_table" code_manager._dfs = [MockObject()] @@ -596,7 +497,7 @@ class MockObject: table_name = "allowed_table" def __init__(self, table_name): - self.table_name = table_name + self.name = table_name code_manager._dfs = [MockObject("table1"), MockObject("table2")] @@ -610,7 +511,7 @@ def test_check_is_query_using_relevant_table_unknown_table( mock_node = ast.parse("sql_query = 'SELECT * FROM unknown_table'").body[0] class MockObject: - table_name = "allowed_table" + name = "allowed_table" code_manager._dfs = [MockObject()] @@ -629,7 +530,7 @@ class MockObject: table_name = "allowed_table" def __init__(self, table_name): - self.table_name = table_name + self.name = table_name code_manager._dfs = [MockObject("table1"), MockObject("unknown_table")] @@ -638,26 +539,59 @@ def __init__(self, table_name): assert len(irrelevant_tables) == 1 def test_clean_code_using_correct_sql_table( - self, pgsql_connector: PostgreSQLConnector, exec_context: MagicMock + self, + pgsql_connector: PostgreSQLConnector, + exec_context: MagicMock, + config_with_direct_sql: Config, + logger: Logger, ): - """Test that the direct SQL function definition is removed when 'direct_sql' is False""" - df = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output=""), "direct_sql": True}, + """Test that the correct sql table""" + code_manager = CodeManager([pgsql_connector], config_with_direct_sql, logger) + safe_code = ( + """sql_query = 'SELECT * FROM your_table'\nexecute_sql_query(sql_query)""" ) - code_manager = df.lake._code_manager + assert code_manager._clean_code(safe_code, exec_context) == safe_code + + def test_clean_code_with_no_execute_sql_query_usage( + self, + pgsql_connector: PostgreSQLConnector, + exec_context: MagicMock, + config_with_direct_sql: Config, + logger: Logger, + ): + """Test that the correct sql table""" + code_manager = CodeManager([pgsql_connector], config_with_direct_sql, logger) safe_code = """sql_query = 'SELECT * FROM your_table'""" + with pytest.raises(ExecuteSQLQueryNotUsed) as excinfo: + code_manager._clean_code(safe_code, exec_context) + assert str(excinfo.value) == ( + "For Direct SQL set to true, execute_sql_query function must be used. Generating Error Prompt!!!" + ) + + def test_clean_code_with_no_execute_sql_query_usage_script( + self, + pgsql_connector: PostgreSQLConnector, + exec_context: MagicMock, + config_with_direct_sql: Config, + logger: Logger, + ): + """Test that the correct sql table""" + code_manager = CodeManager([pgsql_connector], config_with_direct_sql, logger) + safe_code = ( + """orders_count = execute_sql_query('SELECT COUNT(*) FROM orders')[0][0]""" + ) + assert code_manager._clean_code(safe_code, exec_context) == safe_code def test_clean_code_using_incorrect_sql_table( - self, pgsql_connector: PostgreSQLConnector, exec_context: MagicMock + self, + pgsql_connector: PostgreSQLConnector, + exec_context: MagicMock, + config_with_direct_sql: Config, + logger, ): """Test that the direct SQL function definition is removed when 'direct_sql' is False""" - df = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output=""), "direct_sql": True}, - ) - code_manager = df.lake._code_manager + code_manager = CodeManager([pgsql_connector], config_with_direct_sql, logger) safe_code = """sql_query = 'SELECT * FROM unknown_table' """ with pytest.raises(MaliciousQueryError) as excinfo: @@ -668,14 +602,14 @@ def test_clean_code_using_incorrect_sql_table( ) def test_clean_code_using_multi_incorrect_sql_table( - self, pgsql_connector: PostgreSQLConnector, exec_context: MagicMock + self, + pgsql_connector: PostgreSQLConnector, + exec_context: MagicMock, + config_with_direct_sql: Config, + logger: Logger, ): """Test that the direct SQL function definition is removed when 'direct_sql' is False""" - df = SmartDataframe( - pgsql_connector, - config={"llm": FakeLLM(output=""), "direct_sql": True}, - ) - code_manager = df.lake._code_manager + code_manager = CodeManager([pgsql_connector], config_with_direct_sql, logger) safe_code = """sql_query = 'SELECT * FROM table1 INNER JOIN table2 ON table1.id = table2.id'""" with pytest.raises(MaliciousQueryError) as excinfo: code_manager._clean_code(safe_code, exec_context) diff --git a/tests/unit_tests/test_file_importer.py b/tests/unit_tests/test_file_importer.py new file mode 100644 index 000000000..c5a59ba89 --- /dev/null +++ b/tests/unit_tests/test_file_importer.py @@ -0,0 +1,49 @@ +""" +Unit tests for the FileImporter class +""" + +import pandas as pd +import pytest + +from pandasai.helpers.file_importer import FileImporter + + +class TestFileImporter: + """ + Unit tests for the FileImporter class + """ + + @pytest.fixture + def mocked_df(self): + return pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) + + def test_import_csv_file(self, mocked_df, mocker): + mocker.patch.object( + pd, + "read_csv", + return_value=mocked_df, + ) + df = FileImporter.import_from_file("sample.csv") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + def test_import_parquet_file(self, mocked_df, mocker): + mocker.patch.object(pd, "read_parquet", return_value=mocked_df) + df = FileImporter.import_from_file("sample.parquet") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + def test_import_excel_file(self, mocked_df, mocker): + mocker.patch.object( + pd, + "read_excel", + return_value=mocked_df, + ) + df = FileImporter.import_from_file("sample.xlsx") + assert isinstance(df, pd.DataFrame) + assert df.equals(mocked_df) + + @pytest.mark.parametrize("file_path", ["sample.txt", "sample.docx", "sample.pdf"]) + def test_invalid_file_format(self, file_path): + with pytest.raises(ValueError): + FileImporter.import_from_file(file_path) diff --git a/tests/unit_tests/test_query_tracker.py b/tests/unit_tests/test_query_tracker.py index 9a0fe049b..224a477ab 100644 --- a/tests/unit_tests/test_query_tracker.py +++ b/tests/unit_tests/test_query_tracker.py @@ -9,9 +9,12 @@ import pandas as pd import pytest +from pandasai.connectors import PandasConnector from pandasai.helpers.query_exec_tracker import QueryExecTracker from pandasai.llm.fake import FakeLLM -from pandasai.smart_dataframe import SmartDataframe +from pandasai.pipelines.chat.chat_pipeline_input import ( + ChatPipelineInput, +) assert_almost_equal = TestCase().assertAlmostEqual @@ -65,31 +68,30 @@ def sample_df(self): ) @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": False}) + def dataframe(self, sample_df): + return PandasConnector({"original_df": sample_df}) @pytest.fixture - def smart_datalake(self, smart_dataframe: SmartDataframe): - return smart_dataframe.lake + def tracker(self, tracking_info): + tracker = QueryExecTracker() + tracker.start_new_track(tracking_info) + return tracker @pytest.fixture - def tracker(self): - tracker = QueryExecTracker() - tracker.start_new_track() - tracker.add_query_info( + def tracking_info(self): + return ChatPipelineInput( + "which country has the highest GDP?", + "string", conversation_id="123", - instance="SmartDatalake", - query="which country has the highest GDP?", - output_type="json", + prompt_id="1234", ) - return tracker def test_add_dataframes( - self, smart_dataframe: SmartDataframe, tracker: QueryExecTracker + self, dataframe: PandasConnector, tracker: QueryExecTracker ): # Add the dataframe to the tracker tracker._dataframes = [] - tracker.add_dataframes([smart_dataframe]) + tracker.add_dataframes([dataframe]) # Check if the dataframe was added correctly assert len(tracker._dataframes) == 1 @@ -159,21 +161,14 @@ def test_format_response_other_type(self, tracker: QueryExecTracker): assert formatted_response["type"] == "other_type" assert formatted_response["value"] == "SomeValue" - def test_get_summary(self): + def test_get_summary(self, tracking_info): # Execute a mock function to generate some steps and response def mock_function(*args, **kwargs): return "Mock Result" tracker = QueryExecTracker() - tracker.start_new_track() - - tracker.add_query_info( - conversation_id="123", - instance="SmartDatalake", - query="which country has the highest GDP?", - output_type="json", - ) + tracker.start_new_track(tracking_info) # Get the summary summary = tracker.get_summary() @@ -186,34 +181,7 @@ def mock_function(*args, **kwargs): assert "steps" in summary assert "response" in summary assert "execution_time" in summary - assert "is_related_query" in summary["query_info"] - - def test_related_query_in_summary(self): - # Execute a mock function to generate some steps and response - def mock_function(*args, **kwargs): - return "Mock Result" - - tracker = QueryExecTracker() - - tracker.set_related_query(False) - - tracker.start_new_track() - - tracker.add_query_info( - conversation_id="123", - instance="SmartDatalake", - query="which country has the highest GDP?", - output_type="json", - ) - - # Get the summary - summary = tracker.get_summary() - - tracker.execute_func(mock_function, tag="custom_tag") - - # Check if the summary contains the expected keys - assert "is_related_query" in summary["query_info"] - assert not summary["query_info"]["is_related_query"] + assert "skills" in summary def test_get_execution_time(self, tracker: QueryExecTracker): def mock_function(*args, **kwargs): @@ -240,10 +208,10 @@ def test_execute_func_success(self, tracker: QueryExecTracker): mock_func = Mock() mock_func.return_value = mock_return_value - mock_func.__name__ = "_get_prompt" + mock_func.__name__ = "get_prompt" # Execute the mock function using execute_func - result = tracker.execute_func(mock_func, tag="_get_prompt") + result = tracker.execute_func(mock_func, tag="get_prompt") # Check if the result is as expected assert result.to_string() == "Mock Result" @@ -306,7 +274,7 @@ def test_execute_func_re_rerun_code(self, tracker: QueryExecTracker): # Create a mock function mock_func = Mock() mock_func.return_value = "code" - mock_func.__name__ = "_retry_run_code" + mock_func.__name__ = "retry_run_code" # Execute the mock function using execute_func result = tracker.execute_func(mock_func) @@ -405,8 +373,8 @@ def test_publish_method_with_no_config(self, tracker: QueryExecTracker): def mock_get_summary(): return "Test summary data" - os.environ["LOGGING_SERVER_URL"] = "" - os.environ["LOGGING_SERVER_API_KEY"] = "" + os.environ["PANDASAI_API_URL"] = "" + os.environ["PANDASAI_API_KEY"] = "" tracker._server_config = None @@ -434,8 +402,8 @@ def mock_get_summary(): return "Test summary data" # Define a mock environment for testing - os.environ["LOGGING_SERVER_URL"] = "http://test-server" - os.environ["LOGGING_SERVER_API_KEY"] = "test-api-key" + os.environ["PANDASAI_API_URL"] = "http://test-server" + os.environ["PANDASAI_API_KEY"] = "test-api-key" # Set the get_summary method to your mock tracker.get_summary = mock_get_summary @@ -459,7 +427,9 @@ def mock_get_summary(): # Check the result assert result is None # The function should return None - def test_multiple_instance_of_tracker(self, tracker: QueryExecTracker): + def test_multiple_instance_of_tracker( + self, tracker: QueryExecTracker, tracking_info: ChatPipelineInput + ): # Create a mock function mock_func = Mock() mock_func.return_value = "code" @@ -469,13 +439,13 @@ def test_multiple_instance_of_tracker(self, tracker: QueryExecTracker): tracker.execute_func(mock_func, tag="generate_code") tracker2 = QueryExecTracker() - tracker2.start_new_track() - tracker2.add_query_info( - conversation_id="12345", - instance="SmartDatalake", - query="which country has the highest GDP?", - output_type="json", + track_input = ChatPipelineInput( + "which country has the highest GDP?", + "string", + conversation_id="1234", + prompt_id="1234", ) + tracker2.start_new_track(track_input) assert len(tracker._steps) == 1 assert len(tracker2._steps) == 0 @@ -488,8 +458,8 @@ def test_multiple_instance_of_tracker(self, tracker: QueryExecTracker): # Create a mock function mock_func2 = Mock() mock_func2.return_value = "code" - mock_func2.__name__ = "_retry_run_code" - tracker2.execute_func(mock_func2, tag="_retry_run_code") + mock_func2.__name__ = "retry_run_code" + tracker2.execute_func(mock_func2, tag="retry_run_code") assert len(tracker._steps) == 1 assert len(tracker2._steps) == 2 @@ -498,7 +468,9 @@ def test_multiple_instance_of_tracker(self, tracker: QueryExecTracker): != tracker2._query_info["conversation_id"] ) - def test_conversation_id_in_different_tracks(self, tracker: QueryExecTracker): + def test_conversation_id_in_different_tracks( + self, tracker: QueryExecTracker, tracking_info: ChatPipelineInput + ): # Create a mock function mock_func = Mock() mock_func.return_value = "code" @@ -509,21 +481,14 @@ def test_conversation_id_in_different_tracks(self, tracker: QueryExecTracker): summary = tracker.get_summary() - tracker.start_new_track() - - tracker.add_query_info( - conversation_id="123", - instance="SmartDatalake", - query="Plot the GDP's?", - output_type="json", - ) + tracker.start_new_track(tracking_info) # Create a mock function mock_func2 = Mock() mock_func2.return_value = "code" - mock_func2.__name__ = "_retry_run_code" + mock_func2.__name__ = "retry_run_code" - tracker.execute_func(mock_func2, tag="_retry_run_code") + tracker.execute_func(mock_func2, tag="retry_run_code") summary2 = tracker.get_summary() diff --git a/tests/unit_tests/test_smartdataframe.py b/tests/unit_tests/test_smartdataframe.py deleted file mode 100644 index c27f7bf4c..000000000 --- a/tests/unit_tests/test_smartdataframe.py +++ /dev/null @@ -1,1072 +0,0 @@ -"""Unit tests for the SmartDatalake class""" -import json -import logging -import os -import sys -from collections import defaultdict -from typing import Optional -from unittest.mock import Mock, patch -from uuid import UUID - -import pandas as pd -import polars as pl -import pytest - -import pandasai -from pandasai import SmartDataframe -from pandasai.exceptions import LLMNotFoundError -from pandasai.helpers.cache import Cache -from pandasai.helpers.df_info import DataFrameType -from pandasai.helpers.output_types import ( - DefaultOutputType, - output_type_factory, - output_types_map, -) -from pandasai.helpers.viz_library_types import ( - NoVizLibraryType, - viz_lib_map, - viz_lib_type_factory, -) -from pandasai.llm.fake import FakeLLM -from pandasai.prompts import AbstractPrompt, GeneratePythonCodePrompt -from pandasai.pydantic import BaseModel, Field - - -class TestSmartDataframe: - """Unit tests for the SmartDatalake class""" - - def tearDown(self): - for filename in [ - "df_test.parquet", - "df_test_polars.parquet", - "df_duplicate.parquet", - ]: - if os.path.exists("cache/" + filename): - os.remove("cache/" + filename) - - # Remove saved_dfs from pandasai.json - with open("pandasai.json", "r") as json_file: - data = json.load(json_file) - data["saved_dfs"] = [] - with open("pandasai.json", "w") as json_file: - json.dump(data, json_file, indent=2) - - @pytest.fixture - def llm(self, output: Optional[str] = None): - return FakeLLM(output=output) - - @pytest.fixture - def data_sampler(self): - class DataSampler: - df = None - - def __init__(self, df: DataFrameType): - self.df = df - - def sample(self, _n: int = 5): - return self.df - - return DataSampler - - @pytest.fixture - def sample_df(self): - return pd.DataFrame( - { - "country": [ - "United States", - "United Kingdom", - "France", - "Germany", - "Italy", - "Spain", - "Canada", - "Australia", - "Japan", - "China", - ], - "gdp": [ - 19294482071552, - 2891615567872, - 2411255037952, - 3435817336832, - 1745433788416, - 1181205135360, - 1607402389504, - 1490967855104, - 4380756541440, - 14631844184064, - ], - "happiness_index": [ - 6.94, - 7.16, - 6.66, - 7.07, - 6.38, - 6.4, - 7.23, - 7.22, - 5.87, - 5.12, - ], - } - ) - - @pytest.fixture - def sample_saved_dfs(self): - return [ - { - "name": "photo", - "description": "Dataframe containing photo metadata", - "sample": "filename,format,size\n1.jpg,JPEG,1240KB\n2.png,PNG,320KB", - "import_path": "path/to/photo_data.parquet", - } - ] - - @pytest.fixture - def custom_head(self, sample_df: pd.DataFrame): - return pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - - @pytest.fixture - def smart_dataframe(self, llm, sample_df, custom_head): - return SmartDataframe( - sample_df, - config={"llm": llm, "enable_cache": False}, - custom_head=custom_head, - ) - - @pytest.fixture - def llm_result_mocks(self, custom_head): - result_template = "result = {{ 'type': '{type}', 'value': {value} }}" - - return { - "number": result_template.format(type="number", value=1), - "string": result_template.format(type="string", value="'Test'"), - "plot": result_template.format(type="plot", value="'temp_plot.png'"), - "dataframe": result_template.format(type="dataframe", value=custom_head), - } - - @pytest.fixture - def smart_dataframe_mocked_df(self, llm, sample_df, custom_head): - smart_df = SmartDataframe( - sample_df, - config={"llm": llm, "enable_cache": False}, - custom_head=custom_head, - ) - smart_df._core._df = Mock() - return smart_df - - def test_init(self, smart_dataframe): - assert smart_dataframe._table_name is None - assert smart_dataframe._table_description is None - assert smart_dataframe.engine is not None - assert smart_dataframe.dataframe is not None - - def test_init_without_llm(self, sample_df): - with pytest.raises(LLMNotFoundError): - SmartDataframe(sample_df, config={"llm": "-"}) - - def test_run(self, smart_dataframe: SmartDataframe, llm): - llm._output = "result = { 'type': 'number', 'value': 1 }" - assert smart_dataframe.chat("What number comes before 2?") == 1 - - def test_run_with_non_conversational_answer( - self, smart_dataframe: SmartDataframe, llm - ): - llm._output = "result = { 'type': 'number', 'value': 1 + 1 }" - assert smart_dataframe.chat("What is the sum of 1 + 1?") == 2 - - def test_run_code(self, smart_dataframe: SmartDataframe, llm): - llm._output = """ -df = dfs[0] -df['b'] = df['a'] + 1 -result = { 'type': 'dataframe', 'value': df } -""" - smart_dataframe = SmartDataframe( - pd.DataFrame({"a": [1, 2, 3]}), config={"llm": llm, "enable_cache": False} - ) - - output_df = smart_dataframe.chat("Set column b to column a + 1") - assert output_df["a"].tolist() == [1, 2, 3] - assert output_df["b"].tolist() == [2, 3, 4] - - def test_run_with_privacy_enforcement(self, llm): - df = pd.DataFrame({"country": []}) - df = SmartDataframe(df, config={"llm": llm, "enable_cache": False}) - df.enforce_privacy = True - - expected_prompt = """ -dfs[0]:0x1 -country - - - - - -Update this initial code: -```python -# TODO: import the required dependencies -import pandas as pd - -# Write code here - -# Declare result var: type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" } -``` - -Q: How many countries are in the dataframe? -Variable `dfs: list[pd.DataFrame]` is already declared. - -At the end, declare "result" variable as a dictionary of type and value. - - - -Generate python code and return full updated code:""" # noqa: E501 - df.chat("How many countries are in the dataframe?") - last_prompt = df.last_prompt - if sys.platform.startswith("win"): - last_prompt = df.last_prompt.replace("\r\n", "\n") - - assert last_prompt == expected_prompt - - @pytest.mark.parametrize( - "output_type,output_type_hint", - [ - (None, DefaultOutputType().template_hint), - *[ - (type_, output_type_factory(type_).template_hint) - for type_ in output_types_map - ], - ], - ) - @patch("pandasai.responses.response_parser.ResponseParser.parse", autospec=True) - @patch("pandasai.helpers.query_exec_tracker.QueryExecTracker._format_response") - def test_run_passing_output_type( - self, - _format_response_mock, - parser_mock, - llm, - llm_result_mocks, - output_type, - output_type_hint, - ): - df = pd.DataFrame({"country": []}) - df = SmartDataframe(df, config={"llm": llm, "enable_cache": False}) - - expected_prompt = f""" -dfs[0]:0x1 -country - - - - - -Update this initial code: -```python -# TODO: import the required dependencies -import pandas as pd - -# Write code here - -# Declare result var: {output_type_hint} -``` - -Q: How many countries are in the dataframe? -Variable `dfs: list[pd.DataFrame]` is already declared. - -At the end, declare "result" variable as a dictionary of type and value. - - - -Generate python code and return full updated code:""" - parser_mock.return_value = Mock() - _format_response_mock.return_value = Mock() - type_ = output_type if output_type is not None else "string" - llm._output = llm_result_mocks[type_] - - df.chat("How many countries are in the dataframe?", output_type=output_type) - last_prompt = df.last_prompt - if sys.platform.startswith("win"): - last_prompt = df.last_prompt.replace("\r\n", "\n") - assert last_prompt == expected_prompt - - @pytest.mark.parametrize( - "output_type_to_pass,output_type_returned", - [ - ("number", "string"), - ("string", "number"), - ], - ) - def test_run_incorrect_output_type_returned( - self, - smart_dataframe: SmartDataframe, - llm, - sample_df, - output_type_to_pass, - output_type_returned, - ): - llm._output = f"""highest_gdp = dfs[0]['gdp'].max() -result = {{ 'type': '{output_type_returned}', 'value': highest_gdp }} -""" - smart_dataframe = SmartDataframe( - sample_df, config={"llm": llm, "enable_cache": False} - ) - - smart_dataframe.chat( - "What is the highest GDP?", output_type=output_type_to_pass - ) - expected_log = ( - f"The result dict contains inappropriate 'type'. " - f"Expected '{output_type_to_pass}', actual " - f"'{output_type_returned}'" - ) - assert any((expected_log in log.get("msg") for log in smart_dataframe.logs)) - - def test_to_dict(self, smart_dataframe: SmartDataframe): - expected_keys = ("country", "gdp", "happiness_index") - - result_dict = smart_dataframe.to_dict() - - assert isinstance(result_dict, dict) - assert all(key in result_dict for key in expected_keys) - - @pytest.mark.parametrize( - "to_dict_params,expected_passing_params,engine_type", - [ - ({}, {"orient": "dict", "into": dict}, "pandas"), - ({}, {"orient": "dict", "into": dict}, "modin"), - ({}, {"as_series": True}, "polars"), - ({"orient": "dict"}, {"orient": "dict", "into": dict}, "pandas"), - ( - {"orient": "dict", "into": defaultdict}, - {"orient": "dict", "into": defaultdict}, - "pandas", - ), - ({"orient": "dict"}, {"orient": "dict", "into": dict}, "modin"), - ( - {"orient": "dict", "into": defaultdict}, - {"orient": "dict", "into": defaultdict}, - "modin", - ), - ({"as_series": False}, {"as_series": False}, "polars"), - ( - {"as_series": False, "orient": "dict", "into": defaultdict}, - {"as_series": False}, - "polars", - ), - ], - ) - def test_to_dict_passing_parameters( - self, - smart_dataframe_mocked_df: SmartDataframe, - to_dict_params, - engine_type, - expected_passing_params, - ): - smart_dataframe_mocked_df._engine = engine_type - smart_dataframe_mocked_df.to_dict(**to_dict_params) - # noinspection PyUnresolvedReferences - smart_dataframe_mocked_df.dataframe.to_dict.assert_called_once_with( - **expected_passing_params - ) - - def test_extract_code(self, llm): - code = """```python -result = {'happiness': 0.5, 'gdp': 0.8} -print(result)```""" - assert ( - llm._extract_code(code) - == "result = {'happiness': 0.5, 'gdp': 0.8}\nprint(result)" - ) - - code = """``` -result = {'happiness': 1, 'gdp': 0.43}```""" - assert llm._extract_code(code) == "result = {'happiness': 1, 'gdp': 0.43}" - - def test_last_prompt_id(self, smart_dataframe: SmartDataframe): - smart_dataframe.chat("How many countries are in the dataframe?") - prompt_id = smart_dataframe.last_prompt_id - assert isinstance(prompt_id, UUID) - - def test_last_prompt_id_no_prompt(self, smart_dataframe: SmartDataframe): - with pytest.raises(AttributeError): - smart_dataframe.last_prompt_id - - def test_getters_are_accessible(self, smart_dataframe: SmartDataframe, llm): - llm._output = "result = {'type': 'number', 'value': 1}" - smart_dataframe.chat("What number comes before 2?") - assert ( - smart_dataframe.last_code_generated - == "result = {'type': 'number', 'value': 1}" - ) - - def test_save_chart_non_default_dir( - self, smart_dataframe: SmartDataframe, llm, sample_df - ): - """ - Test chat with `SmartDataframe` with custom `save_charts_path`. - - Script: - 1) Ask `SmartDataframe` to build a chart and save it in - a custom directory; - 2) Check if substring representing the directory present in - `llm.last_prompt`. - 3) Check if the code has had a call of `plt.savefig()` passing - the custom directory. - - Notes: - 1) Mock `import_dependency()` util-function to avoid the - actual calls to `matplotlib.pyplot`. - 2) The `analyze_data()` function in the code fixture must have - `"type": None` in the result dict. Otherwise, if it had - `"type": "plot"` (like it has in practice), `_format_results()` - method from `SmartDatalake` object would try to read the image - with `matplotlib.image.imread()` and this test would fail. - Those calls to `matplotlib.image` are unmockable because of - imports inside the function scope, not in the top of a module. - @TODO: figure out if we can just move the imports beyond to - make it possible to mock out `matplotlib.image` - """ - llm._output = """ -import pandas as pd -import matplotlib.pyplot as plt - -df = dfs[0].nlargest(5, 'happiness_index') - -plt.figure(figsize=(8, 6)) -plt.pie(df['happiness_index'], labels=df['country'], autopct='%1.1f%%') -plt.title('Happiness Index for the 5 Happiest Countries') -plt.savefig('temp_chart.png') -plt.close() - -result = {"type": None, "value": "temp_chart.png"} -""" - with patch( - "pandasai.helpers.code_manager.import_dependency" - ) as import_dependency_mock: - smart_dataframe = SmartDataframe( - sample_df, - config={ - "llm": llm, - "enable_cache": False, - "save_charts": True, - "save_charts_path": "charts", - }, - ) - - smart_dataframe.chat("Plot pie-chart the 5 happiest countries") - - plt_mock = getattr(import_dependency_mock.return_value, "matplotlib.pyplot") - assert plt_mock.savefig.called - assert ( - plt_mock.savefig.call_args.args[0] - == f"charts/{smart_dataframe.last_prompt_id}.png" - ) - - def test_shortcut(self, smart_dataframe: SmartDataframe): - smart_dataframe.chat = Mock(return_value="Hello world") - smart_dataframe.clean_data() - smart_dataframe.chat.assert_called_once() - - def test_replace_generate_code_prompt(self, llm): - class CustomPrompt(AbstractPrompt): - template: str = """{test} || {dfs[0].shape[1]} || {conversation}""" - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - replacement_prompt = CustomPrompt(test="test value") - df = SmartDataframe( - pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}), - config={ - "llm": llm, - "enable_cache": False, - "custom_prompts": {"generate_python_code": replacement_prompt}, - }, - ) - question = "Will this work?" - df.chat(question) - - expected_last_prompt = replacement_prompt.to_string() - assert llm.last_prompt == expected_last_prompt - - def test_replace_correct_error_prompt(self, llm): - class ReplacementPrompt(AbstractPrompt): - @property - def template(self): - return "Custom prompt" - - replacement_prompt = ReplacementPrompt() - df = SmartDataframe( - pd.DataFrame(), - config={ - "llm": llm, - "custom_prompts": {"correct_error": replacement_prompt}, - "enable_cache": False, - }, - ) - - df.lake._retry_run_code("wrong code", Exception()) - expected_last_prompt = replacement_prompt.to_string() - assert llm.last_prompt == expected_last_prompt - - def test_saves_logs(self, smart_dataframe: SmartDataframe): - with patch.object(smart_dataframe.lake.logger, "_calculate_time_diff"): - smart_dataframe.lake.logger._calculate_time_diff.return_value = 0 - - assert smart_dataframe.logs == [] - - debug_msg = "Some debug log" - info_msg = "Some info log" - warning_msg = "Some warning log" - error_msg = "Some error log" - critical_msg = "Some critical log" - - smart_dataframe.lake.logger.log(debug_msg, level=logging.DEBUG) - smart_dataframe.lake.logger.log(info_msg) # INFO should be default - smart_dataframe.lake.logger.log(warning_msg, level=logging.WARNING) - smart_dataframe.lake.logger.log(error_msg, level=logging.ERROR) - smart_dataframe.lake.logger.log(critical_msg, level=logging.CRITICAL) - logs = smart_dataframe.logs - - assert len(logs) == 5 - - assert all( - ("msg" in log and "level" in log and "time" in log and "source" in log) - for log in logs - ) - assert { - "msg": debug_msg, - "level": "DEBUG", - "time": 0, - "source": "TestSmartDataframe", - } in logs - assert { - "msg": info_msg, - "level": "INFO", - "time": 0, - "source": "TestSmartDataframe", - } in logs - assert { - "msg": warning_msg, - "level": "WARNING", - "time": 0, - "source": "TestSmartDataframe", - } in logs - assert { - "msg": error_msg, - "level": "ERROR", - "time": 0, - "source": "TestSmartDataframe", - } in logs - assert { - "msg": critical_msg, - "level": "CRITICAL", - "time": 0, - "source": "TestSmartDataframe", - } in logs - - def test_updates_verbose_config_with_setters(self, smart_dataframe: SmartDataframe): - assert smart_dataframe.verbose is False - - smart_dataframe.verbose = True - assert smart_dataframe.verbose - assert smart_dataframe.lake._logger.verbose - assert len(smart_dataframe.lake._logger._logger.handlers) == 1 - assert isinstance( - smart_dataframe.lake._logger._logger.handlers[0], logging.StreamHandler - ) - - smart_dataframe.verbose = False - assert not smart_dataframe.verbose - assert smart_dataframe.lake._logger.verbose is False - assert len(smart_dataframe.lake._logger._logger.handlers) == 0 - - def test_updates_save_logs_config_with_setters( - self, smart_dataframe: SmartDataframe - ): - assert smart_dataframe.save_logs - - smart_dataframe.save_logs = False - assert not smart_dataframe.save_logs - assert not smart_dataframe.lake._logger.save_logs - assert len(smart_dataframe.lake._logger._logger.handlers) == 0 - - smart_dataframe.save_logs = True - assert smart_dataframe.save_logs - assert smart_dataframe.lake._logger.save_logs - assert len(smart_dataframe.lake._logger._logger.handlers) == 1 - assert isinstance( - smart_dataframe.lake._logger._logger.handlers[0], logging.FileHandler - ) - - def test_updates_enable_cache_config_with_setters( - self, smart_dataframe: SmartDataframe - ): - assert smart_dataframe.enable_cache is False - - smart_dataframe.enable_cache = True - assert smart_dataframe.enable_cache - assert smart_dataframe.lake.enable_cache - assert smart_dataframe.lake.cache is not None - assert isinstance(smart_dataframe.lake._cache, Cache) - - smart_dataframe.enable_cache = False - assert not smart_dataframe.enable_cache - assert smart_dataframe.lake.enable_cache is False - assert smart_dataframe.lake.cache is None - - def test_updates_configs_with_setters(self, smart_dataframe: SmartDataframe): - assert smart_dataframe.enforce_privacy is False - assert smart_dataframe.use_error_correction_framework - assert smart_dataframe.custom_prompts == {} - assert smart_dataframe.save_charts is False - assert smart_dataframe.save_charts_path == "exports/charts" - assert smart_dataframe.custom_whitelisted_dependencies == [] - assert smart_dataframe.max_retries == 3 - - smart_dataframe.enforce_privacy = True - assert smart_dataframe.enforce_privacy - - smart_dataframe.use_error_correction_framework = False - assert not smart_dataframe.use_error_correction_framework - - smart_dataframe.custom_prompts = { - "generate_python_code": GeneratePythonCodePrompt() - } - assert smart_dataframe.custom_prompts != {} - - smart_dataframe.save_charts = True - assert smart_dataframe.save_charts - - smart_dataframe.save_charts_path = "some/path" - assert smart_dataframe.save_charts_path == "some/path" - - smart_dataframe.custom_whitelisted_dependencies = ["some_dependency"] - assert smart_dataframe.custom_whitelisted_dependencies == ["some_dependency"] - - smart_dataframe.max_retries = 5 - assert smart_dataframe.max_retries == 5 - - def test_custom_head_getter(self, custom_head, smart_dataframe: SmartDataframe): - assert smart_dataframe.custom_head.equals(custom_head) - - def test_custom_head_setter(self, custom_head, smart_dataframe: SmartDataframe): - new_custom_head = ( - custom_head.copy().sample(frac=1, axis=1).reset_index(drop=True) - ) - smart_dataframe.custom_head = new_custom_head - assert new_custom_head.equals(smart_dataframe.custom_head) - - def test_load_dataframe_from_list(self, smart_dataframe): - input_data = [ - {"column1": 1, "column2": 4}, - {"column1": 2, "column2": 5}, - {"column1": 3, "column2": 6}, - ] - - smart_dataframe._load_dataframe(input_data) - - assert isinstance(smart_dataframe.dataframe, pd.DataFrame) - - def test_load_dataframe_from_dict(self, smart_dataframe): - input_data = {"column1": [1, 2, 3], "column2": [4, 5, 6]} - - smart_dataframe._load_dataframe(input_data) - - assert isinstance(smart_dataframe.dataframe, pd.DataFrame) - - def test_load_dataframe_from_pandas_dataframe(self, smart_dataframe): - pandas_df = pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) - - smart_dataframe._load_dataframe(pandas_df) - - assert isinstance(smart_dataframe.dataframe, pd.DataFrame) - - def test_load_dataframe_from_saved_dfs(self, sample_saved_dfs, mocker): - expected_df = pd.DataFrame( - { - "filename": ["photo1.jpg", "photo2.jpg"], - "format": ["JPEG", "PNG"], - "size": ["1240KB", "320KB"], - } - ) - mocker.patch.object(pandasai.pandas, "read_parquet", return_value=expected_df) - - mocker.patch.object( - json, - "load", - return_value={"saved_dfs": sample_saved_dfs}, - ) - - saved_df_name = "photo" - smart_dataframe = SmartDataframe(saved_df_name) - - assert isinstance(smart_dataframe.dataframe, pd.DataFrame) - assert smart_dataframe.table_name == saved_df_name - assert smart_dataframe.dataframe.equals(expected_df) - - def test_load_dataframe_from_polars(self, smart_dataframe): - polars_df = pl.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) - - smart_dataframe._load_dataframe(polars_df) - - assert isinstance(smart_dataframe.dataframe, pl.DataFrame) - assert smart_dataframe.dataframe.frame_equal(polars_df) - - def test_import_csv_file(self, smart_dataframe, mocker): - mocker.patch.object( - pandasai.pandas, - "read_parquet", - return_value=pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}), - ) - - file_path = "sample.parquet" - - df = smart_dataframe._import_from_file(file_path) - - assert isinstance(df, pd.DataFrame) - - def test_import_parquet_file(self, smart_dataframe, mocker): - mocker.patch.object( - pandasai.pandas, - "read_parquet", - return_value=pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}), - ) - - file_path = "sample.parquet" - - df = smart_dataframe._import_from_file(file_path) - - assert isinstance(df, pd.DataFrame) - - def test_import_excel_file(self, smart_dataframe, mocker): - mocker.patch.object( - pandasai.pandas, - "read_excel", - return_value=pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}), - ) - - file_path = "sample.xlsx" - - df = smart_dataframe._import_from_file(file_path) - - assert isinstance(df, pd.DataFrame) - - expected_df = pd.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) - assert df.equals(expected_df) - - @pytest.mark.parametrize("file_path", ["sample.txt", "sample.docx", "sample.pdf"]) - def test_invalid_file_format(self, smart_dataframe, file_path): - with pytest.raises(ValueError): - smart_dataframe._import_from_file(file_path) - - def test_import_pandas_series(self, llm): - pandas_series = pd.Series([1, 2, 3]) - - smart_dataframe = SmartDataframe(pandas_series, config={"llm": llm}) - - assert isinstance(smart_dataframe.dataframe, pd.DataFrame) - assert smart_dataframe.dataframe.equals(pd.DataFrame({0: [1, 2, 3]})) - - def test_save_pandas_dataframe(self, llm): - with open("pandasai.json", "r") as json_file: - backup_pandasai = json_file.read() - - # Create an instance of SmartDataframe - pandas_df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df_object = SmartDataframe( - pandas_df, - name="df_test", - description="Test description", - config={"llm": llm, "enable_cache": False}, - ) - - # Call the save function - df_object.save() - - # Verify that the data was saved correctly - with open("pandasai.json", "r") as json_file: - data = json.load(json_file) - assert data["saved_dfs"][0]["name"] == "df_test" - - with open("pandasai.json", "w") as json_file: - json_file.write(backup_pandasai) - - def test_save_pandas_dataframe_with_name(self, llm): - with open("pandasai.json", "r") as json_file: - backup_pandasai = json_file.read() - - # Create an instance of SmartDataframe - pandas_df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - df_object = SmartDataframe( - pandas_df, - name="df_test", - description="Test description", - config={"llm": llm, "enable_cache": False}, - ) - - # Call the save function - df_object.save("custom_name") - - # Verify that the data was saved correctly - with open("pandasai.json", "r") as json_file: - data = json.load(json_file) - assert data["saved_dfs"][0]["name"] == "custom_name" - - with open("pandasai.json", "w") as json_file: - json_file.write(backup_pandasai) - - def test_save_polars_dataframe(self, llm): - with open("pandasai.json", "r") as json_file: - backup_pandasai = json_file.read() - - # Create an instance of SmartDataframe - polars_df = pl.DataFrame({"column1": [1, 2, 3], "column2": [4, 5, 6]}) - df_object = SmartDataframe( - polars_df, - name="df_test_polars", - description="Test description", - config={"llm": llm, "enable_cache": False}, - ) - - # Call the save function - df_object.save() - - # Verify that the data was saved correctly - with open("pandasai.json", "r") as json_file: - data = json.load(json_file) - assert data["saved_dfs"][0]["name"] == "df_test_polars" - - # recover file for next test case - with open("pandasai.json", "w") as json_file: - json_file.write(backup_pandasai) - - def test_save_pandas_dataframe_duplicate_name(self, llm): - with open("pandasai.json", "r") as json_file: - backup_pandasai = json_file.read() - - # Create a sample DataFrame - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - # Create instances of SmartDataframe - df_object1 = SmartDataframe( - df, - name="df_duplicate", - description="Description 1", - config={"llm": llm, "enable_cache": False}, - ) - df_object2 = SmartDataframe( - df, - name="df_duplicate", - description="Description 2", - config={"llm": llm, "enable_cache": False}, - ) - - # Call the save function for the first instance - df_object1.save() - - # Attempt to save the second instance and check for ValueError - with pytest.raises(ValueError, match="Duplicate dataframe found: df_duplicate"): - df_object2.save() - - # Recover file for next test case - with open("pandasai.json", "w") as json_file: - json_file.write(backup_pandasai) - - def test_save_pandas_no_name(self, llm): - with open("pandasai.json", "r") as json_file: - backup_pandasai = json_file.read() - - # Create a sample DataFrame - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="No Name", config={"llm": llm, "enable_cache": False} - ) - - # Mock the hashlib.sha256() method - with patch("hashlib.sha256") as mock_sha256: - # Set the return value of the hexdigest() method - mock_sha256.return_value.hexdigest.return_value = "mocked_hash" - - # Call the save() method - df_object.save() - - # Check that hashlib.sha256() was called with the correct argument - mock_sha256.assert_called_with(df_object.head_csv.encode()) - - # Verify that the data was saved correctly - with open("pandasai.json", "r") as json_file: - data = json.load(json_file) - assert data["saved_dfs"][0]["name"] == "mocked_hash" - - # Recover file for next test case - with open("pandasai.json", "w") as json_file: - json_file.write(backup_pandasai) - - def test_pydantic_validate(self, llm): - # Create a sample DataFrame - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="Name", config={"llm": llm, "enable_cache": False} - ) - - # Pydantic Schema - class TestSchema(BaseModel): - A: int - B: int - - validation_result = df_object.validate(TestSchema) - - assert validation_result.passed - - def test_pydantic_validate_false(self, llm): - # Create a sample DataFrame - df = pd.DataFrame({"A": ["Test", "Test2", "Test3", "Test4"], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="Name", config={"llm": llm, "enable_cache": False} - ) - - # Pydantic Schema - class TestSchema(BaseModel): - A: int - B: int - - validation_result = df_object.validate(TestSchema) - - assert validation_result.passed is False - - def test_pydantic_validate_polars(self, llm): - # Create a sample DataFrame - df = pl.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="Name", config={"llm": llm, "enable_cache": False} - ) - - # Pydantic Schema - class TestSchema(BaseModel): - A: int - B: int - - validation_result = df_object.validate(TestSchema) - assert validation_result.passed - - def test_pydantic_validate_false_one_record(self, llm): - # Create a sample DataFrame - df = pd.DataFrame({"A": [1, "test", 3, 4], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="Name", config={"llm": llm, "enable_cache": False} - ) - - # Pydantic Schema - class TestSchema(BaseModel): - A: int - B: int - - validation_result = df_object.validate(TestSchema) - assert ( - validation_result.passed is False and len(validation_result.errors()) == 1 - ) - - def test_pydantic_validate_complex_schema(self, llm): - # Create a sample DataFrame - df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) - - # Create an instance of SmartDataframe without a name - df_object = SmartDataframe( - df, description="Name", config={"llm": llm, "enable_cache": False} - ) - - # Pydantic Schema - class TestSchema(BaseModel): - A: int = Field(..., gt=5) - B: int - - validation_result = df_object.validate(TestSchema) - - assert validation_result.passed is False - - class TestSchema(BaseModel): - A: int = Field(..., lt=5) - B: int - - validation_result = df_object.validate(TestSchema) - - assert validation_result.passed - - def test_head_csv_with_custom_head( - self, custom_head, data_sampler, smart_dataframe: SmartDataframe - ): - with patch("pandasai.smart_dataframe.DataSampler", new=data_sampler): - assert smart_dataframe.head_csv == custom_head.to_csv(index=False) - - @pytest.mark.parametrize( - "viz_library_type,viz_library_type_hint", - [ - (None, NoVizLibraryType().template_hint), - *[ - (type_, viz_lib_type_factory(type_).template_hint) - for type_ in viz_lib_map - ], - ], - ) - def test_run_passing_viz_library_type( - self, llm, viz_library_type, viz_library_type_hint - ): - df = pd.DataFrame({"country": []}) - df = SmartDataframe( - df, - config={ - "llm": llm, - "enable_cache": False, - "data_viz_library": viz_library_type, - }, - ) - - expected_prompt = ( - """ -dfs[0]:0x1 -country - - - - - -Update this initial code: -```python -# TODO: import the required dependencies -import pandas as pd - -# Write code here - -# Declare result var: type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" } -``` - -Q: Plot the histogram of countries showing for each the gdp with distinct bar colors -Variable `dfs: list[pd.DataFrame]` is already declared. - -At the end, declare "result" variable as a dictionary of type and value. -%s - - -Generate python code and return full updated code:""" # noqa: E501 - % viz_library_type_hint - ) - - df.chat( - "Plot the histogram of countries showing for each the gdp" - " with distinct bar colors" - ) - last_prompt = df.last_prompt - if sys.platform.startswith("win"): - last_prompt = df.last_prompt.replace("\r\n", "\n") - - assert last_prompt == expected_prompt diff --git a/tests/unit_tests/test_smartdatalake.py b/tests/unit_tests/test_smartdatalake.py deleted file mode 100644 index 94b3b35c5..000000000 --- a/tests/unit_tests/test_smartdatalake.py +++ /dev/null @@ -1,279 +0,0 @@ -"""Unit tests for the SmartDatalake class""" -import os -import sys -from typing import Optional -from unittest.mock import Mock, patch - -import pandas as pd -import pytest -from langchain import OpenAI - -from pandasai import SmartDataframe, SmartDatalake -from pandasai.connectors.base import SQLConnectorConfig -from pandasai.connectors.sql import PostgreSQLConnector, SQLConnector -from pandasai.constants import DEFAULT_FILE_PERMISSIONS -from pandasai.helpers.code_manager import CodeManager -from pandasai.llm.fake import FakeLLM - - -class TestSmartDatalake: - """Unit tests for the SmartDatlake class""" - - @pytest.fixture - def llm(self, output: Optional[str] = None): - return FakeLLM(output=output) - - @pytest.fixture - def sample_df(self): - return pd.DataFrame( - { - "country": [ - "United States", - "United Kingdom", - "France", - "Germany", - "Italy", - "Spain", - "Canada", - "Australia", - "Japan", - "China", - ], - "gdp": [ - 19294482071552, - 2891615567872, - 2411255037952, - 3435817336832, - 1745433788416, - 1181205135360, - 1607402389504, - 1490967855104, - 4380756541440, - 14631844184064, - ], - "happiness_index": [ - 6.94, - 7.16, - 6.66, - 7.07, - 6.38, - 6.4, - 7.23, - 7.22, - 5.87, - 5.12, - ], - } - ) - - @pytest.fixture - @patch("pandasai.connectors.sql.create_engine", autospec=True) - def sql_connector(self, create_engine): - # Define your ConnectorConfig instance here - self.config = SQLConnectorConfig( - dialect="mysql", - driver="pymysql", - username="your_username", - password="your_password", - host="your_host", - port=443, - database="your_database", - table="your_table", - where=[["column_name", "=", "value"]], - ).dict() - - # Create an instance of SQLConnector - return SQLConnector(self.config) - - @pytest.fixture - @patch("pandasai.connectors.sql.create_engine", autospec=True) - def pgsql_connector(self, create_engine): - # Define your ConnectorConfig instance here - self.config = SQLConnectorConfig( - dialect="mysql", - driver="pymysql", - username="your_username", - password="your_password", - host="your_host", - port=443, - database="your_database", - table="your_table", - where=[["column_name", "=", "value"]], - ).dict() - - # Create an instance of SQLConnector - return PostgreSQLConnector(self.config) - - @pytest.fixture - def smart_dataframe(self, llm, sample_df): - return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": False}) - - @pytest.fixture - def smart_datalake(self, smart_dataframe: SmartDataframe): - return smart_dataframe.lake - - def test_load_llm_with_pandasai_llm(self, smart_datalake: SmartDatalake, llm): - smart_datalake._llm = None - assert smart_datalake._llm is None - - smart_datalake._load_llm(llm) - assert smart_datalake._llm == llm - - def test_load_llm_with_langchain_llm(self, smart_datalake: SmartDatalake, llm): - langchain_llm = OpenAI(openai_api_key="fake_key") - - smart_datalake._llm = None - assert smart_datalake._llm is None - - smart_datalake._load_llm(langchain_llm) - assert smart_datalake._llm._langchain_llm == langchain_llm - - @patch.object( - CodeManager, - "execute_code", - return_value={ - "type": "string", - "value": "There are 10 countries in the dataframe.", - }, - ) - def test_last_result_is_saved(self, _mocked_method, smart_datalake: SmartDatalake): - assert smart_datalake.last_result is None - - _mocked_method.__name__ = "execute_code" - - smart_datalake.chat("How many countries are in the dataframe?") - assert smart_datalake.last_result == { - "type": "string", - "value": "There are 10 countries in the dataframe.", - } - - @patch.object( - CodeManager, - "execute_code", - return_value={ - "type": "string", - "value": "There are 10 countries in the dataframe.", - }, - ) - @patch("pandasai.helpers.query_exec_tracker.QueryExecTracker.publish") - def test_query_tracker_publish_called_in_chat_method( - self, mock_query_tracker_publish, _mocked_method, smart_datalake: SmartDatalake - ): - assert smart_datalake.last_result is None - - _mocked_method.__name__ = "execute_code" - - smart_datalake.chat("How many countries are in the dataframe?") - mock_query_tracker_publish.assert_called() - - def test_retry_on_error_with_single_df( - self, smart_datalake: SmartDatalake, smart_dataframe: SmartDataframe - ): - code = """result = 'Hello World'""" - - smart_dataframe._get_sample_head = Mock( - return_value=pd.DataFrame( - { - "country": ["China", "Japan", "Spain"], - "gdp": [654881226, 9009692259, 8446903488], - "happiness_index": [6.66, 7.16, 6.38], - } - ) - ) - - smart_datalake._retry_run_code( - code=code, - e=Exception("Test error"), - ) - - last_prompt = smart_datalake.last_prompt - if sys.platform.startswith("win"): - last_prompt = last_prompt.replace("\r\n", "\n") - - assert ( - last_prompt - == """ -dfs[0]:10x3 -country,gdp,happiness_index -China,654881226,6.66 -Japan,9009692259,7.16 -Spain,8446903488,6.38 - - -The user asked the following question: - - -You generated this python code: -result = 'Hello World' - -It fails with the following error: -Test error - -Fix the python code above and return the new python code:""" # noqa: E501 - ) - - @patch("os.makedirs") - def test_initialize_with_cache(self, mock_makedirs, smart_datalake): - # Modify the smart_datalake's configuration - smart_datalake.config.save_charts = True - smart_datalake.config.enable_cache = True - - # Call the initialize method - smart_datalake.initialize() - - # Assertions for enabling cache - cache_dir = os.path.join(os.getcwd(), "cache") - mock_makedirs.assert_any_call( - cache_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True - ) - - # Assertions for saving charts - charts_dir = os.path.join(os.getcwd(), smart_datalake.config.save_charts_path) - mock_makedirs.assert_any_call( - charts_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True - ) - - @patch("os.makedirs") - def test_initialize_without_cache(self, mock_makedirs, smart_datalake): - # Modify the smart_datalake's configuration - smart_datalake.config.save_charts = True - smart_datalake.config.enable_cache = False - - # Call the initialize method - smart_datalake.initialize() - - # Assertions for saving charts - charts_dir = os.path.join(os.getcwd(), smart_datalake.config.save_charts_path) - mock_makedirs.assert_called_once_with( - charts_dir, mode=DEFAULT_FILE_PERMISSIONS, exist_ok=True - ) - - def test_validate_true_direct_sql_with_non_connector(self, llm, sample_df): - # raise exception with non connector - SmartDatalake( - [sample_df], - config={"llm": llm, "enable_cache": False, "direct_sql": True}, - ) - - def test_validate_direct_sql_with_connector(self, llm, sql_connector): - # not exception is raised using single connector - SmartDatalake( - [sql_connector], - config={"llm": llm, "enable_cache": False, "direct_sql": True}, - ) - - def test_validate_false_direct_sql_with_connector(self, llm, sql_connector): - # not exception is raised using single connector - SmartDatalake( - [sql_connector], - config={"llm": llm, "enable_cache": False, "direct_sql": False}, - ) - - def test_validate_false_direct_sql_with_two_different_connector( - self, llm, sql_connector, pgsql_connector - ): - # not exception is raised using single connector - SmartDatalake( - [sql_connector, pgsql_connector], - config={"llm": llm, "enable_cache": False, "direct_sql": False}, - ) diff --git a/tests/unit_tests/vectorstores/test_bamboo_vector_store.py b/tests/unit_tests/vectorstores/test_bamboo_vector_store.py new file mode 100644 index 000000000..baea40e80 --- /dev/null +++ b/tests/unit_tests/vectorstores/test_bamboo_vector_store.py @@ -0,0 +1,72 @@ +import unittest +from unittest.mock import patch + +from pandasai.vectorstores.bamboo_vectorstore import BambooVectorStore + + +class TestBambooVector(unittest.TestCase): + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_add_question_answer(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.add_question_answer( + ["What is Chroma?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ) + call_args = mock_request.call_args_list[0][0] + mock_request.assert_called_once() + assert call_args[1] == "POST" + assert call_args[2] == "/training-data" + assert mock_request.call_args_list[0][1] == { + "json": { + "code": ["print('Hello')", "for i in range(10): print(i)"], + "query": ["What is Chroma?", "How does it work?"], + } + } + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_add_docs(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.add_docs(["What is Chroma?"]) + call_args = mock_request.call_args_list[0][0] + mock_request.assert_called_once() + assert call_args[1] == "POST" + assert call_args[2] == "/training-docs" + assert mock_request.call_args_list[0][1] == { + "json": {"docs": ["What is Chroma?"]} + } + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_get_qa(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.get_relevant_qa_documents("Chroma") + mock_request.assert_called_once() + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_get_qa_args(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.get_relevant_qa_documents("Chroma") + call_args = mock_request.call_args_list[0][0] + mock_request.assert_called_once() + assert call_args[1] == "GET" + assert call_args[2] == "/training-data/qa/relevant-qa" + assert mock_request.call_args_list[0][1] == { + "params": {"count": 1, "query": "Chroma"} + } + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_get_docs(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.get_relevant_docs_documents("Chroma") + mock_request.assert_called_once() + + @patch("pandasai.helpers.request.Session.make_request", autospec=True) + def test_get_docs_args(self, mock_request): + bvs = BambooVectorStore(api_key="dummy_key") + bvs.get_relevant_docs_documents("Chroma") + call_args = mock_request.call_args_list[0][0] + mock_request.assert_called_once() + assert call_args[1] == "GET" + assert call_args[2] == "/training-docs/docs/relevant-docs" + assert mock_request.call_args_list[0][1] == { + "params": {"count": 3, "query": "Chroma"} + } diff --git a/tests/unit_tests/vectorstores/test_chromadb.py b/tests/unit_tests/vectorstores/test_chromadb.py new file mode 100644 index 000000000..fb782f6d6 --- /dev/null +++ b/tests/unit_tests/vectorstores/test_chromadb.py @@ -0,0 +1,285 @@ +import unittest +from unittest.mock import patch + +from pandasai.ee.vectorstores.chroma import ChromaDB + + +class TestChromaDB(unittest.TestCase): + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_add_question_answer(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + + chroma = ChromaDB() + chroma.add_question_answer( + ["What is Chroma?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ) + mock_collection.add.assert_called_once() + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_add_question_answer_with_ids(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + + chroma = ChromaDB() + chroma.add_question_answer( + ["What is Chroma?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ["test id 1", "test id 2"], + ) + mock_collection.add.assert_called_once_with( + documents=[ + "Q: What is Chroma?\n A: print('Hello')", + "Q: How does it work?\n A: for i in range(10): print(i)", + ], + metadatas=None, + ids=["test id 1", "test id 2"], + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_add_question_answer_different_dimensions( + self, mock_client, mock_collection + ): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + + chroma = ChromaDB() + with self.assertRaises(ValueError): + chroma.add_question_answer( + ["What is Chroma?", "How does it work?"], + ["print('Hello')"], + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_update_question_answer(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + + chroma = ChromaDB() + chroma.update_question_answer( + ["test id"], + ["What is Chroma?", "How does it work?"], + ["print('Hello')", "for i in range(10): print(i)"], + ) + mock_collection.update.assert_called_once() + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_update_question_answer_different_dimensions( + self, mock_client, mock_collection + ): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + + chroma = ChromaDB() + with self.assertRaises(ValueError): + chroma.update_question_answer( + ["test id"], + ["What is Chroma?", "How does it work?"], + ["print('Hello')"], + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_add_docs(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma.add_docs(["Document 1", "Document 2"]) + mock_collection.add.assert_called_once() + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_add_docs_with_ids(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma.add_docs(["Document 1", "Document 2"], ["test id 1", "test id 2"]) + mock_collection.add.assert_called_once_with( + documents=["Document 1", "Document 2"], + metadatas=None, + ids=["test id 1", "test id 2"], + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_delete_question_and_answers(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._qa_collection = mock_collection + chroma.delete_question_and_answers(["id1", "id2"]) + mock_collection.delete.assert_called_once_with(ids=["id1", "id2"]) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_delete_docs(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._docs_collection = mock_collection + chroma.delete_docs(["id1", "id2"]) + mock_collection.delete.assert_called_once_with(ids=["id1", "id2"]) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_question_answers(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._qa_collection = mock_collection + mock_collection.query.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_question_answers("What is Chroma?", k=3) + self.assertEqual( + result, + { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + }, + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_question_answers_by_ids(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._qa_collection = mock_collection + mock_collection.get.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_question_answers_by_id( + ["test id1", "test id2", "test id3"] + ) + self.assertEqual( + result, + { + "documents": [["Document 1", "Document 2", "Document 3"]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + }, + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_docs(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._docs_collection = mock_collection + mock_collection.query.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_docs("What is Chroma?", k=3) + self.assertEqual( + result, + { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + }, + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_docs_by_id(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._docs_collection = mock_collection + mock_collection.get.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_docs_by_id(["test id1", "test id2", "test id3"]) + self.assertEqual( + result, + { + "documents": [["Document 1", "Document 2", "Document 3"]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + }, + ) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_question_answers_documents( + self, mock_client, mock_collection + ): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._qa_collection = mock_collection + mock_collection.query.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_qa_documents("What is Chroma?", k=3) + self.assertEqual(result, ["Document 1", "Document 2", "Document 3"]) + + @patch( + "pandasai.ee.vectorstores.chroma.chromadb.api.models.Collection.Collection", + autospec=True, + ) + @patch("pandasai.ee.vectorstores.chroma.chromadb.Client", autospec=True) + def test_get_relevant_docs_documents(self, mock_client, mock_collection): + mock_client.return_value.get_or_create_collection.return_value = mock_collection + chroma = ChromaDB() + chroma._qa_collection = mock_collection + mock_collection.query.return_value = { + "documents": [["Document 1", "Document 2", "Document 3"]], + "distances": [[0.5, 0.8, 1.0]], + "metadatas": [[None, None, None]], + "ids": [["test id1", "test id2", "test id3"]], + } + result = chroma.get_relevant_docs_documents("What is Chroma?", k=3) + self.assertEqual(result, ["Document 1", "Document 2", "Document 3"])