From 0762d72ad0ab0b3431305e0e8dafe5244bb945b1 Mon Sep 17 00:00:00 2001 From: Marti Motoyama Date: Wed, 6 Nov 2024 19:43:16 +0000 Subject: [PATCH] Add ability to set dataset project id This CL adds the ability to specify a dataset project id as part of the dataset field in the connection string used to create a BQConnection instance. Previously, we used the project defined in the URL's path component for both the billing and default dataset projects. Recall that the default project and dataset are used to disambiguate unqualified table names referenced in a query being processed by a connection. Now, we parse the potentially fully qualified dataset to check for a project id. If no project id is found, we revert back to our former behavior, using the billing project as the default dataset project. --- README.md | 1 + .../starschema/clouddb/jdbc/BQConnection.java | 90 +++- .../clouddb/jdbc/BQPreparedStatement.java | 1 + .../starschema/clouddb/jdbc/BQStatement.java | 1 + .../clouddb/jdbc/BQStatementRoot.java | 2 + .../clouddb/jdbc/BQSupportFuncts.java | 53 +- .../starschema/clouddb/jdbc/JdbcUrlTest.java | 502 +++++++++++++++++- .../clouddb/jdbc/PreparedStatementTests.java | 55 +- 8 files changed, 680 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a9989291..6697d076 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ For instance for a _service account_ in a properties file: ```ini projectid=disco-parsec-659 +dataset=publicdata.samples type=service user=abc123e@developer.gserviceaccount.com password=bigquery_credentials.p12 diff --git a/src/main/java/net/starschema/clouddb/jdbc/BQConnection.java b/src/main/java/net/starschema/clouddb/jdbc/BQConnection.java index 783609c0..e80161c0 100644 --- a/src/main/java/net/starschema/clouddb/jdbc/BQConnection.java +++ b/src/main/java/net/starschema/clouddb/jdbc/BQConnection.java @@ -22,6 +22,7 @@ import com.google.api.client.http.HttpTransport; import com.google.api.services.bigquery.Bigquery; +import com.google.api.services.bigquery.model.DatasetReference; import com.google.common.base.Splitter; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -42,6 +43,13 @@ * @author Gunics Balázs, Horváth Attila */ public class BQConnection implements Connection { + + // We permit using either a "." or ":" as the delimiter between the dataset and project ids. + private static final String PROJECT_DELIMITERS = ":."; + // The following regex uses a lookahead to match the last occurrence of a project delimiter. + private static final String LAST_PROJECT_DELIMITER_REGEX = + "[" + PROJECT_DELIMITERS + "](?=[^" + PROJECT_DELIMITERS + "]*$)"; + /** Variable to store auto commit mode */ private boolean autoCommitEnabled = false; @@ -50,10 +58,19 @@ public class BQConnection implements Connection { /** The bigquery client to access the service. */ private Bigquery bigquery = null; + /** The default dataset id to configure on queries processed by this connection. */ private String dataset = null; - /** The ProjectId for the connection */ - private String projectId = null; + /** + * The default dataset project id to configure on queries processed by this connection. + * + *

We follow the same naming convention as the corresponding system variable @@dataset_project_id. + */ + private String datasetProjectId = null; + + /** The ProjectId to use for billing on queries processed by this connection. */ + private final String projectId; /** Boolean to determine if the Connection is closed */ private boolean isclosed = false; @@ -152,7 +169,7 @@ public BQConnection(String url, Properties loginProp, HttpTransport httpTranspor if (matchData.find()) { this.projectId = CatalogName.toProjectId(matchData.group(1)); - this.dataset = matchData.group(2); + configureDataSet(matchData.group(2)); } else { this.projectId = CatalogName.toProjectId(pathParams); } @@ -415,6 +432,19 @@ public void close() throws SQLException { } } + /** + * Parses the input dataset expression and sets the values for the dataset and datasetProjectId + * instance variables. + */ + private void configureDataSet(String datasetExpr) { + DatasetReference datasetRef = parseDatasetRef(datasetExpr, this.projectId); + this.dataset = datasetRef.getDatasetId(); + this.datasetProjectId = datasetRef.getProjectId(); + } + + /** + * Returns the default dataset that should be configured on queries processed by this connection. + */ public String getDataSet() { return this.dataset; } @@ -577,7 +607,7 @@ public Struct createStruct(String typeName, Object[] attributes) throws SQLExcep @Override public void setSchema(String schema) { - this.dataset = schema; + configureDataSet(schema); } @Override @@ -694,11 +724,19 @@ public DatabaseMetaData getMetaData() throws SQLException { return metadata; } - /** Getter method for projectId */ + /** Getter method for the projectId to use for billing. */ public String getProjectId() { return projectId; } + /** + * Returns the default dataset project id that should be configured on queries processed by this + * connection. + */ + public String getDataSetProjectId() { + return this.datasetProjectId; + } + /** * * @@ -1258,4 +1296,46 @@ public Integer getTimeoutMs() { public JobCreationMode getJobCreationMode() { return jobCreationMode; } + + /** + * Returns a DatasetReference extracted from the input dataset expression, which may optionally + * include a project id reference. + * + *

This method parses the dataset expression into discrete components, using either a dot ('.') + * or colon (':') as the delimiter between the dataset and project identifiers. We split the + * string on the last occurrence of either delimiter, and we use the length of the split array to + * determine whether the input contains a project id reference. If the {@code datasetExpr} is + * null, we return a DatasetReference with its project id set to the {@code defaultProjectId} + * argument and its dataset id set to null. + * + *

We don't perform any validation on the result; while there are well-defined rules regarding + * project + * ids and dataset + * names, we must handle references to both that don't adhere to the documented requirements + * (such as having a domain name with a colon as part of the project id). Rather than deal with + * the various corner cases for each type of identifier, we defer to the BigQuery API to validate + * the default dataset configured on queries. + * + *

Visible for testing. + * + * @param datasetExpr Dataset expression, generally taken from the BQJDBC connection string. Can + * be null. + * @param defaultProjectId Project id to set on the returned DatasetReference if no project id is + * found in {@code datasetExpr}. + * @return DatasetReference + */ + static DatasetReference parseDatasetRef(String datasetExpr, String defaultProjectId) { + if (datasetExpr == null) { + return new DatasetReference().setDatasetId(null).setProjectId(defaultProjectId); + } + // We split datasetExpr on the last occurrence of a project delimiter. To account for each + // delimiter appearance in the expression, we pass -1 as the limit value to disable discarding + // trailing empty strings. + String[] datasetComponents = datasetExpr.split(LAST_PROJECT_DELIMITER_REGEX, -1); + boolean isDatasetIdOnly = datasetComponents.length == 1; + String datasetId = isDatasetIdOnly ? datasetComponents[0] : datasetComponents[1]; + String datasetProjectId = + isDatasetIdOnly ? defaultProjectId : CatalogName.toProjectId(datasetComponents[0]); + return new DatasetReference().setDatasetId(datasetId).setProjectId(datasetProjectId); + } } diff --git a/src/main/java/net/starschema/clouddb/jdbc/BQPreparedStatement.java b/src/main/java/net/starschema/clouddb/jdbc/BQPreparedStatement.java index a191caa4..82e88b2a 100644 --- a/src/main/java/net/starschema/clouddb/jdbc/BQPreparedStatement.java +++ b/src/main/java/net/starschema/clouddb/jdbc/BQPreparedStatement.java @@ -236,6 +236,7 @@ public ResultSet executeQuery() throws SQLException { this.projectId, this.RunnableStatement, this.connection.getDataSet(), + this.connection.getDataSetProjectId(), this.connection.getUseLegacySql(), this.connection.getMaxBillingBytes()); this.logger.info("Executing Query: " + this.RunnableStatement); diff --git a/src/main/java/net/starschema/clouddb/jdbc/BQStatement.java b/src/main/java/net/starschema/clouddb/jdbc/BQStatement.java index 6310a993..6224c830 100644 --- a/src/main/java/net/starschema/clouddb/jdbc/BQStatement.java +++ b/src/main/java/net/starschema/clouddb/jdbc/BQStatement.java @@ -341,6 +341,7 @@ protected QueryResponse runSyncQuery(String querySql, boolean unlimitedBillingBy projectId, querySql, connection.getDataSet(), + connection.getDataSetProjectId(), this.connection.getUseLegacySql(), !unlimitedBillingBytes ? this.connection.getMaxBillingBytes() : null, getSyncTimeoutMillis(), // we need this to respond fast enough to avoid any diff --git a/src/main/java/net/starschema/clouddb/jdbc/BQStatementRoot.java b/src/main/java/net/starschema/clouddb/jdbc/BQStatementRoot.java index bf8d08d1..a83a0156 100644 --- a/src/main/java/net/starschema/clouddb/jdbc/BQStatementRoot.java +++ b/src/main/java/net/starschema/clouddb/jdbc/BQStatementRoot.java @@ -260,6 +260,7 @@ private int executeDML(String sql) throws SQLException { projectId, sql, connection.getDataSet(), + connection.getDataSetProjectId(), this.connection.getUseLegacySql(), this.connection.getMaxBillingBytes(), (long) querytimeout * 1000, @@ -323,6 +324,7 @@ public ResultSet executeQuery(String querySql, boolean unlimitedBillingBytes) projectId, querySql, connection.getDataSet(), + connection.getDataSetProjectId(), this.connection.getUseLegacySql(), billingBytes, (long) querytimeout * 1000, diff --git a/src/main/java/net/starschema/clouddb/jdbc/BQSupportFuncts.java b/src/main/java/net/starschema/clouddb/jdbc/BQSupportFuncts.java index c17d12b7..002c9592 100644 --- a/src/main/java/net/starschema/clouddb/jdbc/BQSupportFuncts.java +++ b/src/main/java/net/starschema/clouddb/jdbc/BQSupportFuncts.java @@ -68,11 +68,13 @@ public class BQSupportFuncts { */ public static String constructUrlFromPropertiesFile( Properties properties, boolean full, String dataset) throws UnsupportedEncodingException { - String projectId = properties.getProperty("projectid"); + String projectId = properties.getProperty("projectid"); // Represents the billing project. logger.debug("projectId is: " + projectId); String User = properties.getProperty("user"); String Password = properties.getProperty("password"); String path = properties.getProperty("path"); + // The dataset property value can optionally include a reference to a project id, which will be + // used in conjunction with the default dataset to handle unqualified table references. dataset = dataset == null ? properties.getProperty("dataset") : dataset; String forreturn = ""; @@ -621,10 +623,12 @@ public static Properties readFromPropFile(String filePath) throws IOException { * Run a query using the synchronous jobs.query() BigQuery endpoint. * * @param bigquery The BigQuery API wrapper - * @param projectId + * @param projectId The ProjectId to use for billing * @param querySql The SQL to execute * @param dataSet default dataset, can be null - * @param useLegacySql + * @param dataSetProjectId default dataset project id, only specified when the default dataset is + * non-null + * @param useLegacySql Use the legacy SQL dialect when true * @param maxBillingBytes Maximum bytes that the API will allow to bill * @param queryTimeoutMs The timeout at which point the API will return with an incomplete result * NOTE: this does _not_ mean the query fails, just we have to get the results async @@ -640,6 +644,7 @@ static QueryResponse runSyncQuery( String projectId, String querySql, String dataSet, + String dataSetProjectId, Boolean useLegacySql, Long maxBillingBytes, Long queryTimeoutMs, @@ -653,6 +658,7 @@ static QueryResponse runSyncQuery( projectId, querySql, dataSet, + dataSetProjectId, useLegacySql, maxBillingBytes, queryTimeoutMs, @@ -672,6 +678,7 @@ static Bigquery.Jobs.Query getSyncQuery( String projectId, String querySql, String dataSet, + String dataSetProjectId, Boolean useLegacySql, Long maxBillingBytes, Long queryTimeoutMs, @@ -692,7 +699,8 @@ static Bigquery.Jobs.Query getSyncQuery( qr = qr.setJobCreationMode(jobCreationMode.name()); } if (dataSet != null) { - qr.setDefaultDataset(new DatasetReference().setDatasetId(dataSet).setProjectId(projectId)); + qr.setDefaultDataset( + new DatasetReference().setDatasetId(dataSet).setProjectId(dataSetProjectId)); } if (maxResults != null) { qr.setMaxResults(maxResults); @@ -701,12 +709,44 @@ static Bigquery.Jobs.Query getSyncQuery( return bigquery.jobs().query(projectId, qr); } + /** + * Starts a new query in async mode. + * + *

This method exists to maintain backwards compatibility with prior bqjdbc releases. + * + * @param bigquery The bigquery instance, which is authorized + * @param projectId The project ID to use for both the billing and default dataset project ids + * @param querySql The sql query which we want to run + * @param dataSet The default dataset, can be null + * @param useLegacySql Use the legacy SQL dialect when true + * @param maxBillingBytes Maximum bytes that the API will allow to bill + * @return A JobReference which we'll use to poll the bigquery, for its state, then for its mined + * data. + * @throws IOException + *

if the request for initializing or executing job fails + */ + public static Job startQuery( + Bigquery bigquery, + String projectId, + String querySql, + String dataSet, + Boolean useLegacySql, + Long maxBillingBytes) + throws IOException { + return startQuery( + bigquery, projectId, querySql, dataSet, projectId, useLegacySql, maxBillingBytes); + } + /** * Starts a new query in async mode. * * @param bigquery The bigquery instance, which is authorized - * @param projectId The project's ID + * @param projectId The project ID to use for billing * @param querySql The sql query which we want to run + * @param dataSet The default dataset, can be null + * @param dataSetProjectId The default dataset project id, only specified when the default dataset + * is non-null + * @param useLegacySql Use the legacy SQL dialect when true * @return A JobReference which we'll use to poll the bigquery, for its state, then for its mined * data. * @throws IOException @@ -717,6 +757,7 @@ public static Job startQuery( String projectId, String querySql, String dataSet, + String dataSetProjectId, Boolean useLegacySql, Long maxBillingBytes) throws IOException { @@ -732,7 +773,7 @@ public static Job startQuery( if (dataSet != null) queryConfig.setDefaultDataset( - new DatasetReference().setDatasetId(dataSet).setProjectId(projectId)); + new DatasetReference().setDatasetId(dataSet).setProjectId(dataSetProjectId)); job.setConfiguration(config); queryConfig.setQuery(querySql); diff --git a/src/test/java/net/starschema/clouddb/jdbc/JdbcUrlTest.java b/src/test/java/net/starschema/clouddb/jdbc/JdbcUrlTest.java index d9653505..5038fc21 100644 --- a/src/test/java/net/starschema/clouddb/jdbc/JdbcUrlTest.java +++ b/src/test/java/net/starschema/clouddb/jdbc/JdbcUrlTest.java @@ -4,13 +4,19 @@ import com.google.api.client.testing.http.MockLowLevelHttpRequest; import com.google.api.client.testing.http.MockLowLevelHttpResponse; import com.google.api.services.bigquery.Bigquery.Jobs.Query; +import com.google.api.services.bigquery.model.DatasetReference; import com.google.common.collect.ImmutableMap; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.nio.file.Files; import java.nio.file.Paths; import java.security.GeneralSecurityException; +import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Statement; import java.util.Map; import java.util.Properties; import junit.framework.Assert; @@ -36,16 +42,198 @@ public class JdbcUrlTest { public void setup() throws SQLException, IOException { properties = getProperties("/installedaccount.properties"); URL = getUrl("/installedaccount.properties", null) + "&useLegacySql=true"; - ; + this.bq = new BQConnection(URL, new Properties()); this.environmentVariables.set("GOOGLE_APPLICATION_CREDENTIALS", defaultServiceAccount); } + @Test + public void constructorForBQConnectionWorksWithoutProjectIdAndWithoutDataset() + throws SQLException { + // This test demonstrates that the constructor for BQConnection allows for an empty project id + // (see regex for pathParamsMatcher). + String url = + "jdbc:BQDriver:?" + + "withServiceAccount=true" + + "&user=697117590302-76cr6q3217nck6gks0kf4r151j4d9f8e@developer.gserviceaccount.com" + + "&password=src/test/resources/bigquery_credentials.p12" + + "&useLegacySql=true"; + + BQConnection connection = new BQConnection(url, new Properties()); + + Assert.assertEquals("", connection.getProjectId()); + } + + @Test + public void constructorForBQConnectionWorksWithEmptyDatasetId() throws SQLException { + // This test demonstrates that the constructor for BQConnection allows for an empty default + // dataset id (see regex for projectAndDatasetMatcher). + String url = + "jdbc:BQDriver:disco-parsec-659/?" + + "withServiceAccount=true" + + "&user=697117590302-76cr6q3217nck6gks0kf4r151j4d9f8e@developer.gserviceaccount.com" + + "&password=src/test/resources/bigquery_credentials.p12" + + "&useLegacySql=true"; + + BQConnection connection = new BQConnection(url, new Properties()); + + Assert.assertEquals("disco-parsec-659", connection.getProjectId()); + Assert.assertEquals("disco-parsec-659", connection.getDataSetProjectId()); + Assert.assertEquals("", connection.getDataSet()); + } + + @Test + public void parseDatasetRefShouldWorkWithFullyQualifiedDataset() { + DatasetReference datasetRef1 = + BQConnection.parseDatasetRef("project1.dataset1", "billingProject"); + DatasetReference datasetRef2 = + BQConnection.parseDatasetRef("project2:dataset2", "billingProject"); + DatasetReference datasetRef3 = + BQConnection.parseDatasetRef("domain.com:project3:dataset3", "billingProject"); + DatasetReference datasetRef4 = + BQConnection.parseDatasetRef("domain.com:project4.dataset4", "billingProject"); + DatasetReference datasetRef5 = + BQConnection.parseDatasetRef("project5.dataset-5", "billingProject"); + DatasetReference datasetRef6 = + BQConnection.parseDatasetRef("project6:dataset-6", "billingProject"); + + Assert.assertEquals( + new DatasetReference().setProjectId("project1").setDatasetId("dataset1"), datasetRef1); + Assert.assertEquals( + new DatasetReference().setProjectId("project2").setDatasetId("dataset2"), datasetRef2); + Assert.assertEquals( + new DatasetReference().setProjectId("domain.com:project3").setDatasetId("dataset3"), + datasetRef3); + Assert.assertEquals( + new DatasetReference().setProjectId("domain.com:project4").setDatasetId("dataset4"), + datasetRef4); + Assert.assertEquals( + new DatasetReference().setProjectId("project5").setDatasetId("dataset-5"), datasetRef5); + Assert.assertEquals( + new DatasetReference().setProjectId("project6").setDatasetId("dataset-6"), datasetRef6); + } + + @Test + public void parseDatasetRefShouldWorkWithoutDatasetId() { + DatasetReference datasetRef1 = BQConnection.parseDatasetRef("project1.", "billingProject"); + DatasetReference datasetRef2 = BQConnection.parseDatasetRef("project2:", "billingProject"); + DatasetReference datasetRef3 = BQConnection.parseDatasetRef(":", "billingProject"); + DatasetReference datasetRef4 = BQConnection.parseDatasetRef(".", "billingProject"); + DatasetReference datasetRef5 = BQConnection.parseDatasetRef("", "billingProject"); + + Assert.assertEquals( + new DatasetReference().setProjectId("project1").setDatasetId(""), datasetRef1); + Assert.assertEquals( + new DatasetReference().setProjectId("project2").setDatasetId(""), datasetRef2); + Assert.assertEquals(new DatasetReference().setProjectId("").setDatasetId(""), datasetRef3); + Assert.assertEquals(new DatasetReference().setProjectId("").setDatasetId(""), datasetRef4); + Assert.assertEquals( + new DatasetReference().setProjectId("billingProject").setDatasetId(""), datasetRef5); + } + + @Test + public void parseDatasetRefShouldWorkWithoutDatasetProjectId() { + DatasetReference datasetRef1 = BQConnection.parseDatasetRef("dataset1", "billingProject1"); + DatasetReference datasetRef2 = BQConnection.parseDatasetRef("dataset2", ""); + DatasetReference datasetRef3 = BQConnection.parseDatasetRef("dataset3", null); + DatasetReference datasetRef4 = BQConnection.parseDatasetRef(".dataset4", "billingProject"); + DatasetReference datasetRef5 = BQConnection.parseDatasetRef(":dataset5", "billingProject"); + + Assert.assertEquals( + new DatasetReference().setProjectId("billingProject1").setDatasetId("dataset1"), + datasetRef1); + Assert.assertEquals( + new DatasetReference().setProjectId("").setDatasetId("dataset2"), datasetRef2); + Assert.assertEquals( + new DatasetReference().setProjectId(null).setDatasetId("dataset3"), datasetRef3); + Assert.assertEquals( + new DatasetReference().setProjectId("").setDatasetId("dataset4"), datasetRef4); + Assert.assertEquals( + new DatasetReference().setProjectId("").setDatasetId("dataset5"), datasetRef5); + } + + @Test + public void constructUrlFromPropertiesFileShouldWorkWhenProjectIdAndDatasetProjectIdAreSet() + throws IOException, SQLException { + properties.put("projectid", "disco-parsec-659"); + properties.put("dataset", "publicdata.samples"); + File tempFile = File.createTempFile("tmp_installedaccount", ".properties"); + tempFile.deleteOnExit(); + try (FileOutputStream outputStream = new FileOutputStream(tempFile)) { + properties.store(outputStream, "Test connection properties"); + } + Properties customProperties = BQSupportFuncts.readFromPropFile(tempFile.getAbsolutePath()); + String url = BQSupportFuncts.constructUrlFromPropertiesFile(customProperties, true, null); + + BQConnection connection = new BQConnection(url, new Properties()); + + Assert.assertEquals("disco-parsec-659", connection.getProjectId()); + Assert.assertEquals("publicdata", connection.getDataSetProjectId()); + Assert.assertEquals("samples", connection.getDataSet()); + } + + @Test + public void constructUrlFromPropertiesFileShouldWorkWhenOnlyProjectIdSet() + throws IOException, SQLException { + properties.put("projectid", "disco-parsec-659"); + properties.put("dataset", "looker_test"); + File tempFile = File.createTempFile("tmp_installedaccount", ".properties"); + tempFile.deleteOnExit(); + try (FileOutputStream outputStream = new FileOutputStream(tempFile)) { + properties.store(outputStream, "Test connection properties"); + } + Properties customProperties = BQSupportFuncts.readFromPropFile(tempFile.getAbsolutePath()); + String url = BQSupportFuncts.constructUrlFromPropertiesFile(customProperties, true, null); + + BQConnection connection = new BQConnection(url, new Properties()); + + Assert.assertEquals("disco-parsec-659", connection.getProjectId()); + Assert.assertEquals("disco-parsec-659", connection.getDataSetProjectId()); + Assert.assertEquals("looker_test", connection.getDataSet()); + } + @Test public void urlWithDefaultDatasetShouldWork() throws SQLException { Assert.assertEquals(properties.getProperty("dataset"), bq.getDataSet()); } + @Test + public void urlWithoutDatasetProjectIdShouldWorkAndGetDataSetProjectIdShouldReturnProjectId() { + Assert.assertEquals("disco-parsec-659", bq.getDataSetProjectId()); + } + + @Test + public void urlWithoutDatasetShouldWorkAndGetDataSetProjectIdShouldReturnProjectId() { + String urlWithoutDataset = URL.replace("/" + properties.getProperty("dataset"), ""); + + try { + BQConnection bqWithoutDataset = new BQConnection(urlWithoutDataset, new Properties()); + + Assert.assertEquals("disco-parsec-659", bqWithoutDataset.getDataSetProjectId()); + Assert.assertEquals(null, bqWithoutDataset.getDataSet()); + } catch (SQLException e) { + throw new AssertionError(e); + } + } + + @Test + public void urlWithDatasetProjectIdShouldWorkAndBeReturnedByGetDataSetProjectId() { + String urlWithDatasetProjectId = + URL.replace( + "/" + properties.getProperty("dataset"), + "/looker-test-db." + properties.getProperty("dataset")); + + try { + BQConnection bqWithDatasetProjectId = + new BQConnection(urlWithDatasetProjectId, new Properties()); + + Assert.assertEquals("disco-parsec-659", bqWithDatasetProjectId.getProjectId()); + Assert.assertEquals("looker-test-db", bqWithDatasetProjectId.getDataSetProjectId()); + } catch (SQLException e) { + throw new AssertionError(e); + } + } + @Test public void projectWithColons() throws SQLException { String urlWithColonContainingProject = URL.replace(bq.getProjectId(), "example.com:project"); @@ -72,6 +260,55 @@ public void mungedProjectName() throws SQLException { } } + @Test + public void mungedDatasetProjectName() throws SQLException { + String urlWithDatasetProjectIdContainingUnderscores = + URL.replace( + "/" + properties.getProperty("dataset"), + "/example_com__project." + properties.getProperty("dataset")); + + try { + BQConnection bqWithUnderscores = + new BQConnection(urlWithDatasetProjectIdContainingUnderscores, new Properties()); + + Assert.assertEquals("example.com:project", bqWithUnderscores.getDataSetProjectId()); + } catch (SQLException e) { + throw new AssertionError(e); + } + } + + @Test + public void setSchemaWorksWhenDatasetProjectIdIsUnspecified() throws SQLException { + this.bq.setSchema("tokyo_star"); + + Assert.assertEquals("tokyo_star", this.bq.getDataSet()); + Assert.assertEquals("disco-parsec-659", this.bq.getDataSetProjectId()); + } + + @Test + public void setSchemaWorksWhenDatasetProjectIdIsSpecified() { + this.bq.setSchema("publicdata.samples"); + + Assert.assertEquals("samples", this.bq.getDataSet()); + Assert.assertEquals("publicdata", this.bq.getDataSetProjectId()); + } + + @Test + public void setSchemaWorksWhenInputIsEmpty() { + this.bq.setSchema(""); + + Assert.assertEquals("", this.bq.getDataSet()); + Assert.assertEquals("disco-parsec-659", this.bq.getDataSetProjectId()); + } + + @Test + public void setSchemaWorksWhenInputIsNull() { + this.bq.setSchema(null); + + Assert.assertEquals(null, this.bq.getDataSet()); + Assert.assertEquals("disco-parsec-659", this.bq.getDataSetProjectId()); + } + @Test public void urlWithTimeouts() throws SQLException { try { @@ -117,6 +354,268 @@ public void canRunQueryWithDefaultDataset() throws SQLException { stmt.executeQuery("SELECT * FROM orders limit 1"); } + @Test + public void canRunQueryOnStatementWithEmptyDatasetProjectId() + throws SQLException, UnsupportedEncodingException { + properties.put("dataset", ".looker_test"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + // Specifying an empty dataset project id results in the billing project id being used instead. + Assert.assertEquals("", bqConnection.getDataSetProjectId()); + Assert.assertEquals("looker_test", bqConnection.getDataSet()); + stmt.executeQuery("SELECT * FROM orders limit 1"); + } + + @Test + public void canRunQueryOnPreparedStatementWithEmptyDatasetProjectId() + throws SQLException, UnsupportedEncodingException { + properties.put("dataset", ".looker_test"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = bqConnection.prepareStatement("SELECT * FROM orders limit 1"); + + // Specifying an empty dataset project id results in the billing project id being used instead. + Assert.assertEquals("", bqConnection.getDataSetProjectId()); + Assert.assertEquals("looker_test", bqConnection.getDataSet()); + stmt.executeQuery(); + } + + @Test + public void canNotRunQueryOnStatementWithoutDatasetProjectId() throws SQLException, IOException { + // The query should fail since the billing project 'disco-parsec-659' does not have the dataset + // 'samples'. + properties.put("dataset", "samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + Assert.assertEquals("disco-parsec-659", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + try { + stmt.executeQuery("SELECT * FROM shakespeare LIMIT 1"); + Assert.fail("Expected SQLException to be thrown without default dataset project id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("disco-parsec-659:samples was not found in")); + } + } + + @Test + public void canNotRunQueryOnStatementWithEmptyDatasetProjectId() + throws SQLException, IOException { + // Specifying an empty string for the dataset project id should result in the billing project + // id being used instead. The query should fail since the billing project 'disco-parsec-659' + // does not have the dataset 'samples'. + properties.put("dataset", ".samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + Assert.assertEquals("", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + try { + stmt.executeQuery("SELECT * FROM shakespeare LIMIT 1"); + Assert.fail("Expected SQLException to be thrown without default dataset project id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("disco-parsec-659:samples was not found in")); + } + } + + @Test + public void canNotRunQueryOnStatementWithoutDataset() throws SQLException, IOException { + // The query should fail since there is no default dataset specified in the connection and the + // query has an unqualified table reference. + properties.remove("dataset"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + Assert.assertEquals("disco-parsec-659", bqConnection.getDataSetProjectId()); + Assert.assertEquals(null, bqConnection.getDataSet()); + try { + stmt.executeQuery("SELECT * FROM shakespeare LIMIT 1"); + Assert.fail("Expected SQLException to be thrown without default dataset"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("must be qualified with a dataset")); + } + } + + @Test + public void canNotRunQueryOnStatementWithEmptyDatasetId() throws SQLException, IOException { + // The query should fail since the default dataset id is empty. + properties.put("dataset", "publicdata."); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + Assert.assertEquals("publicdata", bqConnection.getDataSetProjectId()); + Assert.assertEquals("", bqConnection.getDataSet()); + try { + stmt.executeQuery("SELECT * FROM shakespeare LIMIT 1"); + Assert.fail("Expected SQLException to be thrown without default dataset id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("parameter is missing: dataset_id")); + } + } + + @Test + public void canRunQueryOnStatementWithFullyQualifiedDataset() throws SQLException, IOException { + // Add the dataset project id that houses the 'samples' dataset. We do not have the ability to + // create BigQuery jobs in 'publicdata', but we can read from the 'samples' dataset in that + // project. If we fail to respect the dataset project id, then the query will fail, since the + // billing project 'disco-parsec-659' does not have that dataset. + properties.put("dataset", "publicdata.samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + Statement stmt = bqConnection.createStatement(); + + // This should succeed. We use the default dataset and project for the read query, but we charge + // the computation to 'disco-parsec-659'. + Assert.assertEquals("publicdata", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + stmt.executeQuery("SELECT * FROM shakespeare LIMIT 1"); + } + + @Test + public void canNotRunQueryOnPreparedStatementWithoutDatasetProjectId() + throws SQLException, IOException { + // The query should fail since the billing project 'disco-parsec-659' does not have the dataset + // 'samples'. + properties.put("dataset", "samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = + bqConnection.prepareStatement("SELECT TOP(word, 3), COUNT(*) FROM shakespeare"); + + Assert.assertEquals("disco-parsec-659", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + try { + stmt.executeQuery(); + Assert.fail("Expected SQLException to be thrown without default dataset project id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("disco-parsec-659:samples was not found in")); + } + } + + @Test + public void canNotRunQueryOnPreparedStatementWithEmptyDatasetProjectId() + throws SQLException, IOException { + // Specifying an empty string for the dataset project id should result in the billing project + // id being used instead. The query should fail since the billing project 'disco-parsec-659' + // does not have the dataset 'samples'. + properties.put("dataset", ".samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = + bqConnection.prepareStatement("SELECT TOP(word, 3), COUNT(*) FROM shakespeare"); + + Assert.assertEquals("", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + try { + stmt.executeQuery(); + Assert.fail("Expected SQLException to be thrown without default dataset project id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("disco-parsec-659:samples was not found in")); + } + } + + @Test + public void canNotRunQueryOnPreparedStatementWithoutDataset() throws SQLException, IOException { + // The query should fail since there is no default dataset specified in the connection and the + // query has an unqualified table reference. + properties.remove("dataset"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = + bqConnection.prepareStatement("SELECT TOP(word, 3), COUNT(*) FROM shakespeare"); + + Assert.assertEquals("disco-parsec-659", bqConnection.getDataSetProjectId()); + Assert.assertEquals(null, bqConnection.getDataSet()); + try { + stmt.executeQuery(); + Assert.fail("Expected SQLException to be thrown without default dataset"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("must be qualified with a dataset")); + } + } + + @Test + public void canNotRunQueryOnnPreparedStatementWithEmptyDatasetId() + throws SQLException, IOException { + // The query should fail since the default dataset id is empty. + properties.put("dataset", "publicdata."); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = + bqConnection.prepareStatement("SELECT TOP(word, 3), COUNT(*) FROM shakespeare"); + + Assert.assertEquals("publicdata", bqConnection.getDataSetProjectId()); + Assert.assertEquals("", bqConnection.getDataSet()); + try { + stmt.executeQuery(); + Assert.fail("Expected SQLException to be thrown without default dataset id"); + } catch (SQLException e) { + Assert.assertTrue(e.getMessage().contains("parameter is missing: dataset_id")); + } + } + + @Test + public void canRunQueryOnPreparedStatementWithFullyQualifiedDataset() + throws SQLException, IOException { + // Add the dataset project id that houses the 'samples' dataset. We do not have the ability to + // create BigQuery jobs in 'publicdata', but we can read from the 'samples' dataset in that + // project. If we fail to respect the dataset project id, then the query will fail, since the + // billing project 'disco-parsec-659' does not have that dataset. + properties.put("dataset", "publicdata.samples"); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(properties, true, null) + + "&useLegacySql=true"; + BQConnection bqConnection = new BQConnection(url, new Properties()); + + PreparedStatement stmt = + bqConnection.prepareStatement("SELECT TOP(word, 3), COUNT(*) FROM shakespeare"); + + // This should succeed. We use the default dataset and project for the read query, but we charge + // the computation to 'disco-parsec-659'. + Assert.assertEquals("publicdata", bqConnection.getDataSetProjectId()); + Assert.assertEquals("samples", bqConnection.getDataSet()); + stmt.executeQuery(); + } + @Test public void canConnectWithPasswordProtectedP12File() throws SQLException, IOException { String url = getUrl("/protectedaccount.properties", null); @@ -208,6 +707,7 @@ public void oAuthAccessTokenOnlyInHeader() oauthProps.getProperty("projectid"), "SELECT * FROM orders limit 1", bqConn.getDataSet(), + bqConn.getDataSetProjectId(), bqConn.getUseLegacySql(), null, stmt.getSyncTimeoutMillis(), diff --git a/src/test/java/net/starschema/clouddb/jdbc/PreparedStatementTests.java b/src/test/java/net/starschema/clouddb/jdbc/PreparedStatementTests.java index b8fcb142..81930162 100644 --- a/src/test/java/net/starschema/clouddb/jdbc/PreparedStatementTests.java +++ b/src/test/java/net/starschema/clouddb/jdbc/PreparedStatementTests.java @@ -21,15 +21,18 @@ package net.starschema.clouddb.jdbc; import java.io.StringReader; +import java.io.UnsupportedEncodingException; import java.math.BigDecimal; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSetMetaData; import java.sql.SQLException; +import java.sql.Types; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; +import java.util.Properties; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -41,10 +44,12 @@ * @author Balazs Gunics */ public class PreparedStatementTests { - /** Static reference to the connection object */ static Connection con = null; + /** Properties used to create connection object */ + private Properties connectionProperties; + /** * Compares two String[][] * @@ -71,12 +76,14 @@ private boolean comparer(String[][] expected, String[][] reality) { @Before public void Connect() throws Exception { try { + connectionProperties = + BQSupportFuncts.readFromPropFile( + getClass().getResource("/installedaccount1.properties").getFile()); + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(connectionProperties) + + "&useLegacySql=true"; Class.forName("net.starschema.clouddb.jdbc.BQDriver"); - PreparedStatementTests.con = - DriverManager.getConnection( - BQSupportFuncts.constructUrlFromPropertiesFile( - BQSupportFuncts.readFromPropFile("installedaccount1.properties")), - BQSupportFuncts.readFromPropFile("installedaccount1.properties")); + PreparedStatementTests.con = DriverManager.getConnection(url, connectionProperties); } catch (Exception e) { e.printStackTrace(); } @@ -146,11 +153,7 @@ public void ResultSetMetadataFunctionTestTypes() { {"SELECT CAST('2021-04-09T20:24:39' AS DATETIME)", "2021-04-09T20:24:39"}, {"SELECT CAST('1:23:45' AS TIME)", "01:23:45"}, {"SELECT CAST('test' AS BYTES)", "dGVzdA=="}, - {"SELECT CAST('123' as BIGNUMERIC)", "123"}, - { - "SELECT ST_GEOGFROMTEXT('LINESTRING(6.2312655 51.9967517, 6.2312606 51.9968043)')", - "LINESTRING(6.2312655 51.9967517, 6.2312606 51.9968043)" - } + {"SELECT CAST('123' as BIGNUMERIC)", "123"} }; final int[] expectedType = @@ -165,8 +168,7 @@ public void ResultSetMetadataFunctionTestTypes() { java.sql.Types.TIMESTAMP, java.sql.Types.TIME, java.sql.Types.VARCHAR, - java.sql.Types.NUMERIC, - java.sql.Types.VARCHAR + java.sql.Types.NUMERIC }; for (int i = 0; i < queries.length; i++) { @@ -203,6 +205,33 @@ public void ResultSetMetadataFunctionTestTypes() { con = null; } + @Test + public void ResultSetMetadataFunctionTestTypesOnQueryThatRequiresGoogleSQL() + throws SQLException, UnsupportedEncodingException { + String query = + "SELECT ST_GEOGFROMTEXT('LINESTRING(6.2312655 51.9967517, 6.2312606 51.9968043)')"; + String url = + BQSupportFuncts.constructUrlFromPropertiesFile(connectionProperties) + + "&useLegacySql=false"; + int expectedTye = Types.VARCHAR; + String expectedResult = "LINESTRING(6.2312655 51.9967517, 6.2312606 51.9968043)"; + Connection connection = DriverManager.getConnection(url, connectionProperties); + PreparedStatement stm = connection.prepareStatement(query); + + java.sql.ResultSet result = stm.executeQuery(); + + Assert.assertNotNull(result); + Assert.assertEquals( + "Expected type was not returned in metadata", + expectedTye, + result.getMetaData().getColumnType(1)); + while (result.next()) { + Assert.assertNotNull(result.getObject(1)); + Assert.assertEquals(expectedResult, result.getString(1)); + } + connection.close(); + } + /** setBigDecimal test */ @Test public void setBigDecimalTest() {