diff --git a/clients/spark/core/src/main/scala/io/treeverse/clients/GarbageCollector.scala b/clients/spark/core/src/main/scala/io/treeverse/clients/GarbageCollector.scala index e11f2165a86..149cd3a1ee0 100644 --- a/clients/spark/core/src/main/scala/io/treeverse/clients/GarbageCollector.scala +++ b/clients/spark/core/src/main/scala/io/treeverse/clients/GarbageCollector.scala @@ -356,14 +356,17 @@ object GarbageCollector { println("Expired addresses:") expiredAddresses.show() - val storageNamespace = new ApiClient(apiURL, accessKey, secretKey).getStorageNamespace(repo) + var storageNamespace = new ApiClient(apiURL, accessKey, secretKey).getStorageNamespace(repo) + if (!storageNamespace.endsWith("/")) { + storageNamespace += "/" + } val removed = remove(storageNamespace, gcAddressesLocation, expiredAddresses, runID, region, hcValues) val commitsDF = getCommitsDF(runID, gcCommitsLocation, spark) - val reportLogsDst = s"${storageNamespace}/_lakefs/logs/gc/summary/" - val reportExpiredDst = s"${storageNamespace}/_lakefs/logs/gc/expired_addresses/" + val reportLogsDst = concatToGCLogsPrefix(storageNamespace, "summary") + val reportExpiredDst = concatToGCLogsPrefix(storageNamespace, "expired_addresses") val time = DateTimeFormatter.ISO_INSTANT.format(java.time.Clock.systemUTC.instant()) writeParquetReport(commitsDF, reportLogsDst, time, "commits.parquet") @@ -375,11 +378,16 @@ object GarbageCollector { .write .partitionBy("run_id") .mode(SaveMode.Overwrite) - .parquet(s"${storageNamespace}/_lakefs/logs/gc/deleted_objects/${time}/deleted.parquet") + .parquet(concatToGCLogsPrefix(storageNamespace, s"deleted_objects/$time/deleted.parquet")) spark.close() } + private def concatToGCLogsPrefix(storageNameSpace: String, key: String): String = { + val strippedKey = key.stripPrefix("/") + s"${storageNameSpace}_lakefs/logs/gc/$strippedKey" + } + private def repartitionBySize(df: DataFrame, maxSize: Int, column: String): DataFrame = { val nRows = df.count() val nPartitions = math.max(1, math.ceil(nRows / maxSize)).toInt diff --git a/pkg/graveler/retention/garbage_collection_manager.go b/pkg/graveler/retention/garbage_collection_manager.go index dd14c6544c6..60a03ffadf1 100644 --- a/pkg/graveler/retention/garbage_collection_manager.go +++ b/pkg/graveler/retention/garbage_collection_manager.go @@ -19,9 +19,9 @@ import ( ) const ( - configFileSuffixTemplate = "/%s/retention/gc/rules/config.json" - addressesFilePrefixTemplate = "/%s/retention/gc/addresses/" - commitsFileSuffixTemplate = "/%s/retention/gc/commits/run_id=%s/commits.csv" + configFileSuffixTemplate = "%s/retention/gc/rules/config.json" + addressesFilePrefixTemplate = "%s/retention/gc/addresses/" + commitsFileSuffixTemplate = "%s/retention/gc/commits/run_id=%s/commits.csv" ) type GarbageCollectionManager struct {