IntegriChain1 · jacobtobias · Apr 17, 2021 · Apr 17, 2021 · Apr 27, 2021 · Apr 27, 2021
diff --git a/README.md b/README.md
@@ -77,6 +77,12 @@ A lot of pre-filtering involves trimming down your dataset based on the values a
                                                 key,
                                                 partition)
 
+## To Test
+```
+bash dev_env --build
+pytest tests/
+```
+
 ## Redshift Spectrum
 Dataframes published to S3 can optionally be queried in AWS Redshift Spectrum. To enable this functionality, you must have an external database configured in Redshift. See the [AWS docs](https://docs.aws.amazon.com/redshift/latest/dg/c-using-spectrum.html) for help setting up a database in Redshift. To enable this functionality in S3parq, simply pass a dictionary of configurations to `publish()` via the redshift_params argument.
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
-pandas==0.24.2
-pyarrow==0.13.0
-boto3==1.9.177
-s3fs==0.2.1
+pandas==1.2.4
+pyarrow==4.0.0
+boto3==1.17.58
+s3fs==0.4.2
 dfmock==0.0.14
 moto==1.3.8
 psycopg2==2.8.3

diff --git a/s3parq/publish_parq.py b/s3parq/publish_parq.py
@@ -459,7 +459,7 @@ def log_size_estimate(num_bytes):
         yield {'lower': lower, 'upper': upper}
 
 
-def publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFrame, redshift_params: dict = None) -> List[str]:
+def publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFrame, redshift_params: dict = None, read_access_user: str =None) -> List[str]:
     """ Dataframe to S3 Parquet Publisher
     This function handles the portion of work that will see a dataframe converted
     to parquet and then published to the given S3 location.
@@ -513,7 +513,7 @@ def publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFram
 
         session_helper.configure_session_helper()
         publish_redshift.create_schema(
-            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], session_helper)
+            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], session_helper, read_access_user)
         logger.debug(
             f"Schema {redshift_params['schema_name']} created. Creating table {redshift_params['table_name']}...")
 
@@ -553,7 +553,7 @@ def publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFram
 
     return files
 
-def custom_publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFrame, custom_redshift_columns: dict, redshift_params: dict = None) -> List[str]:
+def custom_publish(bucket: str, key: str, partitions: List[str], dataframe: pd.DataFrame, custom_redshift_columns: dict, redshift_params: dict = None, read_access_user: str =None) -> List[str]:
     """ Dataframe to S3 Parquet Publisher with a CUSTOM redshift column definition.
     Custom publish allows custom defined redshift column definitions to be used and 
     enables support for Redshift's decimal data type. 
@@ -618,7 +618,7 @@ def custom_publish(bucket: str, key: str, partitions: List[str], dataframe: pd.D
 
         session_helper.configure_session_helper()
         publish_redshift.create_schema(
-            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], session_helper)
+            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], session_helper, read_access_user)
         logger.debug(
             f"Schema {redshift_params['schema_name']} created. Creating table {redshift_params['table_name']}...")
 

diff --git a/s3parq/publish_redshift.py b/s3parq/publish_redshift.py
@@ -178,7 +178,7 @@ def _datatype_mapper(columns: dict) -> dict:
     return f"({sql_statement[:-2]})"  # Slice off the last space and comma
 
 
-def create_schema(schema_name: str, db_name: str, iam_role: str, session_helper: SessionHelper) -> None:
+def create_schema(schema_name: str, db_name: str, iam_role: str, session_helper: SessionHelper, read_access_user=None) -> None:
     """ Creates a schema in AWS redshift using a given iam_role
 
     Args:
@@ -198,6 +198,13 @@ def create_schema(schema_name: str, db_name: str, iam_role: str, session_helper:
         logger.info(f'Running query to create schema: {new_schema_query}')
         scope.execute(new_schema_query)
 
+        if read_access_user:
+            grant_access_query = f"GRANT USAGE ON SCHEMA {schema_name} TO {read_access_user};\
+                GRANT SELECT ON ALL TABLES IN SCHEMA {schema_name} TO {read_access_user};\
+                ALTER DEFAULT PRIVILEGES IN SCHEMA {schema_name} GRANT SELECT ON TABLES TO {read_access_user};"
+            logger.info(f'Running query to grant access to schema: {grant_access_query}')
+            scope.execute(grant_access_query)
+
 
 def create_table(table_name: str, schema_name: str, columns: dict, partitions: dict, path: str, session_helper: SessionHelper) -> None:
     """ Creates a table in AWS redshift. The table will be named 

diff --git a/tests/test_publish_parq.py b/tests/test_publish_parq.py
@@ -285,7 +285,7 @@ def test_schema_publish(self, mock_session_helper, mock_create_schema):
                      dataframe=dataframe, partitions=partitions, redshift_params=redshift_params)
 
         mock_create_schema.assert_called_once_with(
-            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], msh)
+            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], msh, None)
 
     @patch('s3parq.publish_redshift.create_table')
     @patch('s3parq.publish_parq.SessionHelper')
@@ -375,7 +375,7 @@ def test_custom_publish_schema_publish(self, mock_session_helper, mock_create_sc
                      custom_redshift_columns=custom_redshift_columns)
 
         mock_create_schema.assert_called_once_with(
-            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], msh)
+            redshift_params['schema_name'], redshift_params['db_name'], redshift_params['iam_role'], msh, None)
 
     @patch('s3parq.publish_redshift.create_custom_table')
     @patch('s3parq.publish_parq.SessionHelper')