Skip to content

Commit

Permalink
feat(google_bigquery): load google big query from base64 string of cr… (
Browse files Browse the repository at this point in the history
Sinaptik-AI#1008)

* feat(google_bigquery): load google big query from base64 string of credentials

* tests: add test case for base64 string

* fix: fix linter

---------

Co-authored-by: Gabriele Venturi <[email protected]>
  • Loading branch information
ArslanSaleem and gventuri authored Mar 9, 2024
1 parent cf61cde commit bf3392f
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 9 deletions.
37 changes: 34 additions & 3 deletions examples/from_googlebigquery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import base64
import json

from pandasai import SmartDataframe

# A license might be required for using Snowflake with PandasAI
Expand All @@ -9,17 +12,45 @@
# KEYFILE_PATH
# PROJECT_ID


# EXAMPLE 1
bigquery_connectors = GoogleBigQueryConnector(
config={
"credentials_path": "/Users/arslan/Downloads/loan-project.json",
"database": "loan_payments",
"table": "loan_payments",
"projectID": "loan-project",
"where": [["Gender", "=", "female"]],
}
)

llm = OpenAI("OPEN-API_KEY")
df = SmartDataframe(bigquery_connectors, config={"llm": llm})

response = df.chat("How many rows are there in data ?")
print(response)

# EXAMPLE 2
# initialize google big query using Base64 string
with open("/Users/arslan/Downloads/loan-project.json", "r") as file:
json_data = json.load(file)

# Convert JSON data to a string
json_string = json.dumps(json_data, indent=2)
encoded_bytes = base64.b64encode(json_string.encode("utf-8"))


bigquery_connectors = GoogleBigQueryConnector(
config={
"credentials_path": "credentials.json",
"credentials_base64": encoded_bytes,
"database": "loan_payments",
"table": "loan_payments",
"projectID": "project_id",
"projectID": "loan-project",
"where": [["Gender", "=", "female"]],
}
)

llm = OpenAI("OPEN_AI_KEY")
llm = OpenAI("OPEN-API_KEY")
df = SmartDataframe(bigquery_connectors, config={"llm": llm})

response = df.chat("How many rows are there in data ?")
Expand Down
24 changes: 18 additions & 6 deletions pandasai/ee/connectors/google_big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

from sqlalchemy import create_engine

from pandasai.exceptions import InvalidConfigError

from ...connectors.base import BaseConnectorConfig
from ...connectors.sql import SQLBaseConnectorConfig, SQLConnector

Expand All @@ -16,7 +18,8 @@ class GoogleBigQueryConnectorConfig(SQLBaseConnectorConfig):
Connector configuration for big query.
"""

credentials_path: str
credentials_path: str = None
credentials_base64: str = None
database: str
table: str
projectID: str
Expand All @@ -43,6 +46,11 @@ def __init__(self, config: Union[GoogleBigQueryConnectorConfig, dict]):
}
config = self._populate_config_from_env(config, env_vars)

if "credentials_base64" not in config and "credentials_path" not in config:
raise InvalidConfigError(
"credentials_path or credentials_base64 is needed to connect"
)

super().__init__(config)

def _load_connector_config(self, config: Union[BaseConnectorConfig, dict]):
Expand All @@ -56,11 +64,15 @@ def _init_connection(self, config: GoogleBigQueryConnectorConfig):
config (GoogleBigQueryConnectorConfig): Configurations to load database
"""

self._engine = create_engine(
f"{config.dialect}://{config.projectID}/{config.database}",
credentials_path=config.credentials_path,
)
if config.credentials_path:
self._engine = create_engine(
f"{config.dialect}://{config.projectID}/{config.database}",
credentials_path=config.credentials_path,
)
else:
self._engine = create_engine(
f"{config.dialect}://{config.projectID}/{config.database}?credentials_base64={config.credentials_base64}"
)

self._connection = self._engine.connect()

Expand Down
20 changes: 20 additions & 0 deletions tests/unit_tests/connectors/test_google_big_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ def test_constructor_and_properties(
mock_load_connector_config.assert_called()
mock_init_connection.assert_called()

@patch("pandasai.ee.connectors.google_big_query.create_engine", autospec=True)
def test_constructor_and_properties_with_base64_string(self, mock_create_engine):
self.mock_engine = Mock()
self.mock_connection = Mock()
self.mock_engine.connect.return_value = self.mock_connection
mock_create_engine.return_value = self.mock_engine

self.config = GoogleBigQueryConnectorConfig(
dialect="bigquery",
database="database",
table="yourtable",
credentials_base64="base64_str",
projectID="project_id",
).dict()

self.connector = GoogleBigQueryConnector(self.config)
mock_create_engine.assert_called_with(
"bigquery://project_id/database?credentials_base64=base64_str"
)

def test_repr_method(self):
# Test __repr__ method
expected_repr = (
Expand Down

0 comments on commit bf3392f

Please sign in to comment.