Skip to content

Commit

Permalink
Fix GCS path parsing (#181)
Browse files Browse the repository at this point in the history
For a path such as `gs://delta_sharing_test/foo`, `getHost` will return null and trigger NPE. This PR changes the code to call GCS APIs to parse a path instead to avoid such issue (It calls `getAuthority` underlying instead).

This is not an issue for S3 or Azure because they do require the bucket name to be a valid host name (cannot contain `_`).
  • Loading branch information
zsxwing authored Aug 26, 2022
1 parent a9b6aee commit 8a2c7c4
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit.SECONDS

import com.amazonaws.HttpMethod
import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest
import com.google.cloud.hadoop.gcsio.StorageResourceId
import com.google.cloud.storage.BlobId
import com.google.cloud.storage.BlobInfo
import com.google.cloud.storage.Storage
Expand Down Expand Up @@ -202,13 +203,18 @@ class GCSFileSigner(
private val storage = StorageOptions.newBuilder.build.getService

override def sign(path: Path): String = {
val absPath = path.toUri
val bucketName = absPath.getHost
val objectName = absPath.getPath.stripPrefix("/")
val (bucketName, objectName) = GCSFileSigner.getBucketAndObjectNames(path)
assert(objectName.nonEmpty, s"cannot get object key from $path")
val blobInfo = BlobInfo.newBuilder(BlobId.of(bucketName, objectName)).build
storage.signUrl(
blobInfo, preSignedUrlTimeoutSeconds, SECONDS, Storage.SignUrlOption.withV4Signature())
.toString
}
}

object GCSFileSigner {
def getBucketAndObjectNames(path: Path): (String, String) = {
val resourceId = StorageResourceId.fromUriPath(path.toUri, false /* = allowEmptyObjectName */)
(resourceId.getBucketName, resourceId.getObjectName)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Copyright (2021) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.delta.sharing.server

import org.apache.hadoop.fs.Path
import org.scalatest.FunSuite

class CloudFileSignerSuite extends FunSuite {

test("GCSFileSigner.getBucketAndObjectNames") {
assert(GCSFileSigner.getBucketAndObjectNames(new Path("gs://delta-sharing-test/foo"))
== ("delta-sharing-test", "foo"))
assert(GCSFileSigner.getBucketAndObjectNames(new Path("gs://delta_sharing_test/foo"))
== ("delta_sharing_test", "foo"))
}
}

0 comments on commit 8a2c7c4

Please sign in to comment.