GoogleCloudDataproc · jphalip · Sep 15, 2023 · Sep 15, 2023 · functicons · Sep 18, 2023
diff --git a/README-template.md b/README-template.md
@@ -40,7 +40,11 @@ To build the connector jar:
 
 ### Prerequisite
 
-Make sure you have the BigQuery Storage API enabled in your GCP project. Follow [these instructions](https://cloud.google.com/bigquery/docs/reference/storage/#enabling_the_api).
+Enable the BigQuery Storage API for your project:
+
+```sh
+gcloud services enable bigquerystorage.googleapis.com
+```
 
 ### Option 1: connectors init action
 
@@ -614,19 +618,19 @@ There are multiple options to override the default behavior and to provide custo
   for specific users, specific groups, or for all users that run the Hive query by default using
   the below properties:
 
-    - `bq.impersonation.service.account.for.user.<USER_NAME>` (not set by default)
+  - `bq.impersonation.service.account.for.user.<USER_NAME>` (not set by default)
 
-      The service account to be impersonated for a specific user. You can specify multiple
-      properties using that pattern for multiple users.
+    The service account to be impersonated for a specific user. You can specify multiple
+    properties using that pattern for multiple users.
 
-    - `bq.impersonation.service.account.for.group.<GROUP_NAME>` (not set by default)
+  - `bq.impersonation.service.account.for.group.<GROUP_NAME>` (not set by default)
 
-      The service account to be impersonated for a specific group. You can specify multiple
-      properties using that pattern for multiple groups.
+    The service account to be impersonated for a specific group. You can specify multiple
+    properties using that pattern for multiple groups.
 
-    - `bq.impersonation.service.account` (not set by default)
+  - `bq.impersonation.service.account` (not set by default)
 
-      Default service account to be impersonated for all users.
+    Default service account to be impersonated for all users.
 
   If any of the above properties are set then the service account specified will be impersonated by
   generating a short-lived credentials when accessing BigQuery.
@@ -711,7 +715,7 @@ export PROJECT=my-gcp-project
 export BIGLAKE_LOCATION=us
 export BIGLAKE_REGION=us-central1
 export BIGLAKE_CONNECTION=hive-integration-tests
-export BIGLAKE_BUCKET=${USER}-biglake-test
+export BIGLAKE_BUCKET=${PROJECT}-biglake-tests
 ```
 
 Create the test BigLake connection:
@@ -773,29 +777,26 @@ You must use Java version 8, as it's the version that Hive itself uses. Make sur
 
 Acceptance tests create Dataproc clusters with the connector and run jobs to verify it.
 
-The following environment variables must be set and **exported** first.
-
-* `GOOGLE_APPLICATION_CREDENTIALS` - the full path to a credentials JSON, either a service account or the result of a
-  `gcloud auth login` run
-* `GOOGLE_CLOUD_PROJECT` - The Google cloud platform project used to test the connector
-* `TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the integration tests
-* `ACCEPTANCE_TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the acceptance tests
-
 To run the acceptance tests:
 
-```sh
-./mvnw verify -Pdataproc21,acceptance
-```
+1. Enable the Dataproc API for your project:
+   ```sh
+   gcloud services enable dataproc.googleapis.com
+   ```
+2. Run the tests:
+   ```sh
+   ./mvnw verify -Pdataproc21,acceptance
+   ```
 
-If you want to avoid rebuilding `shaded-dependencies` and `shaded-test-dependencies` when there is no changes in these
+If you want to avoid rebuilding `shaded-dependencies` and `shaded-acceptance-tests-dependencies` when there is no changes in these
 modules, you can break it down into several steps, and only rerun the necessary steps:
 
 ```sh
 # Install hive-bigquery-parent/pom.xml to Maven local repo
 mvn install:install-file -Dpackaging=pom -Dfile=hive-bigquery-parent/pom.xml -DpomFile=hive-bigquery-parent/pom.xml
 
-# Build and install shaded-dependencies and shaded-test-dependencies jars to Maven local repo
-mvn clean install -pl shaded-dependencies,shaded-test-dependencies -Pdataproc21 -DskipTests
+# Build and install shaded-deps-dataproc21 and shaded-acceptance-tests-dependencies jars to Maven local repo
+mvn clean install -pl shaded-deps-dataproc21,shaded-acceptance-tests-dependencies -Pdataproc21 -DskipTests
 
 # Build and test connector
 mvn clean verify -pl connector -Pdataproc21,acceptance

diff --git a/README.md b/README.md
@@ -40,7 +40,11 @@ To build the connector jar:
 
 ### Prerequisite
 
-Make sure you have the BigQuery Storage API enabled in your GCP project. Follow [these instructions](https://cloud.google.com/bigquery/docs/reference/storage/#enabling_the_api).
+Enable the BigQuery Storage API for your project:
+
+```sh
+gcloud services enable bigquerystorage.googleapis.com
+```
 
 ### Option 1: connectors init action
 
@@ -614,19 +618,19 @@ There are multiple options to override the default behavior and to provide custo
   for specific users, specific groups, or for all users that run the Hive query by default using
   the below properties:
 
-    - `bq.impersonation.service.account.for.user.<USER_NAME>` (not set by default)
+  - `bq.impersonation.service.account.for.user.<USER_NAME>` (not set by default)
 
-      The service account to be impersonated for a specific user. You can specify multiple
-      properties using that pattern for multiple users.
+    The service account to be impersonated for a specific user. You can specify multiple
+    properties using that pattern for multiple users.
 
-    - `bq.impersonation.service.account.for.group.<GROUP_NAME>` (not set by default)
+  - `bq.impersonation.service.account.for.group.<GROUP_NAME>` (not set by default)
 
-      The service account to be impersonated for a specific group. You can specify multiple
-      properties using that pattern for multiple groups.
+    The service account to be impersonated for a specific group. You can specify multiple
+    properties using that pattern for multiple groups.
 
-    - `bq.impersonation.service.account` (not set by default)
+  - `bq.impersonation.service.account` (not set by default)
 
-      Default service account to be impersonated for all users.
+    Default service account to be impersonated for all users.
 
   If any of the above properties are set then the service account specified will be impersonated by
   generating a short-lived credentials when accessing BigQuery.
@@ -773,29 +777,26 @@ You must use Java version 8, as it's the version that Hive itself uses. Make sur
 
 Acceptance tests create Dataproc clusters with the connector and run jobs to verify it.
 
-The following environment variables must be set and **exported** first.
-
-* `GOOGLE_APPLICATION_CREDENTIALS` - the full path to a credentials JSON, either a service account or the result of a
-  `gcloud auth login` run
-* `GOOGLE_CLOUD_PROJECT` - The Google cloud platform project used to test the connector
-* `TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the integration tests
-* `ACCEPTANCE_TEST_BUCKET` - The GCS bucked used to test writing to BigQuery during the acceptance tests
-
 To run the acceptance tests:
 
-```sh
-./mvnw verify -Pdataproc21,acceptance
-```
+1. Enable the Dataproc API for your project:
+   ```sh
+   gcloud services enable dataproc.googleapis.com
+   ```
+2. Run the tests:
+   ```sh
+   ./mvnw verify -Pdataproc21,acceptance
+   ```
 
-If you want to avoid rebuilding `shaded-dependencies` and `shaded-test-dependencies` when there is no changes in these
+If you want to avoid rebuilding `shaded-dependencies` and `shaded-acceptance-tests-dependencies` when there is no changes in these
 modules, you can break it down into several steps, and only rerun the necessary steps:
 
 ```sh
 # Install hive-bigquery-parent/pom.xml to Maven local repo
 mvn install:install-file -Dpackaging=pom -Dfile=hive-bigquery-parent/pom.xml -DpomFile=hive-bigquery-parent/pom.xml
 
-# Build and install shaded-dependencies and shaded-test-dependencies jars to Maven local repo
-mvn clean install -pl shaded-dependencies,shaded-test-dependencies -Pdataproc21 -DskipTests
+# Build and install shaded-deps-dataproc21 and shaded-acceptance-tests-dependencies jars to Maven local repo
+mvn clean install -pl shaded-deps-dataproc21,shaded-acceptance-tests-dependencies -Pdataproc21 -DskipTests
 
 # Build and test connector
 mvn clean verify -pl connector -Pdataproc21,acceptance

diff --git a/cloudbuild/presubmit.sh b/cloudbuild/presubmit.sh
@@ -27,7 +27,7 @@ readonly ACTION=$1
 readonly PROFILES="dataproc21"
 readonly MVN="./mvnw -B -e -Dmaven.repo.local=/workspace/.repository"
 
-export TEST_BUCKET=dataproc-integ-tests
+export INTEGRATION_BUCKET=dataproc-integ-tests
 export BIGLAKE_BUCKET=dataproc-integ-tests
 export BIGLAKE_CONNECTION=hive-integration-tests
 

diff --git a/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java b/connector/src/test/java/com/google/cloud/hive/bigquery/connector/TestUtils.java
@@ -46,7 +46,7 @@ public class TestUtils {
   public static final String MANAGED_TEST_TABLE_NAME = "managed_test";
   public static final String FIELD_TIME_PARTITIONED_TABLE_NAME = "field_time_partitioned";
   public static final String INGESTION_TIME_PARTITIONED_TABLE_NAME = "ingestion_time_partitioned";
-  public static final String TEST_BUCKET_ENV_VAR = "TEST_BUCKET";
+  public static final String INTEGRATION_BUCKET_ENV_VAR = "INTEGRATION_BUCKET";
 
   // The BigLake bucket and connection must be created before running the tests.
   // Also, the connection's service account must be given permission to access the bucket.
@@ -211,8 +211,9 @@ public static String getBigLakeBucket() {
    * Returns the name of the bucket used to store temporary Avro files when testing the indirect
    * write method. This bucket is created automatically when running the tests.
    */
-  public static String getTestBucket() {
-    return System.getenv().getOrDefault(TEST_BUCKET_ENV_VAR, getProject() + "-integration-tests");
+  public static String getIntegrationTestBucket() {
+    return System.getenv()
+        .getOrDefault(INTEGRATION_BUCKET_ENV_VAR, getProject() + "-integration-tests");
   }
 
   public static void createBqDataset(String dataset) {
@@ -269,7 +270,15 @@ private static Storage getStorageClient() {
   }
 
   public static void createBucket(String bucketName) {
-    getStorageClient().create(BucketInfo.newBuilder(bucketName).setLocation(LOCATION).build());
+    try {
+      getStorageClient().create(BucketInfo.newBuilder(bucketName).setLocation(LOCATION).build());
+    } catch (StorageException e) {
+      if (e.getCode() == 409) {
+        // The bucket already exists, which is okay.
+        return;
+      }
+      throw e;
+    }
   }
 
   public static void uploadBlob(String bucketName, String objectName, byte[] contents) {

diff --git a/...est/java/com/google/cloud/hive/bigquery/connector/acceptance/AcceptanceTestConstants.java b/...est/java/com/google/cloud/hive/bigquery/connector/acceptance/AcceptanceTestConstants.java
@@ -15,17 +15,13 @@
  */
 package com.google.cloud.hive.bigquery.connector.acceptance;
 
-import com.google.common.base.Preconditions;
 import org.apache.parquet.Strings;
 
 public class AcceptanceTestConstants {
 
   public static final String REGION = "us-west1";
   public static final String DATAPROC_ENDPOINT = REGION + "-dataproc.googleapis.com:443";
-  public static final String PROJECT_ID =
-      Preconditions.checkNotNull(
-          System.getenv("GOOGLE_CLOUD_PROJECT"),
-          "Please set the 'GOOGLE_CLOUD_PROJECT' environment variable");
+  public static final String ACCEPTANCE_BUCKET_ENV_VAR = "ACCEPTANCE_BUCKET";
 
   public static final boolean CLEAN_UP_CLUSTER =
       Strings.isNullOrEmpty(System.getenv("CLEAN_UP_CLUSTER"))

diff --git a/...rc/test/java/com/google/cloud/hive/bigquery/connector/acceptance/AcceptanceTestUtils.java b/...rc/test/java/com/google/cloud/hive/bigquery/connector/acceptance/AcceptanceTestUtils.java
@@ -15,6 +15,7 @@
  */
 package com.google.cloud.hive.bigquery.connector.acceptance;
 
+import com.google.auth.oauth2.GoogleCredentials;
 import com.google.cloud.WriteChannel;
 import com.google.cloud.bigquery.BigQuery;
 import com.google.cloud.bigquery.BigQuery.DatasetDeleteOption;
@@ -75,7 +76,6 @@ public String getMarker() {
   }
 
   // must be set in order to run the acceptance test
-  static final String BUCKET = System.getenv("ACCEPTANCE_TEST_BUCKET");
   private static final BigQuery bq = BigQueryOptions.getDefaultInstance().getService();
 
   static Storage storage =
@@ -163,8 +163,27 @@ public static BlobId uploadToGcs(ByteBuffer content, String destinationUri, Stri
     return blobId;
   }
 
-  public static String createTestBaseGcsDir(String testId) {
-    return String.format("gs://%s/hivebq-tests/%s", BUCKET, testId);
+  public static String getAcceptanceProject() {
+    String project = System.getenv("GOOGLE_CLOUD_PROJECT");
+    if (project != null) {
+      return project;
+    }
+    try {
+      return GoogleCredentials.getApplicationDefault().getQuotaProjectId();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static String getAcceptanceTestBucket() {
+    return System.getenv()
+        .getOrDefault(
+            AcceptanceTestConstants.ACCEPTANCE_BUCKET_ENV_VAR,
+            AcceptanceTestUtils.getAcceptanceProject() + "-acceptance-tests");
+  }
+
+  public static String getTestBaseGcsDir(String testId) {
+    return String.format("gs://%s/hivebq-tests/%s", getAcceptanceTestBucket(), testId);
   }
 
   public static Blob getBlob(String gcsDirUri, String fileSuffix) throws URISyntaxException {