Skip to content

Commit

Permalink
Garbage Collector: Eliminate double slash in URL (#3525)
Browse files Browse the repository at this point in the history
* remove slash from beginning of path and make sure the storage namespace end with a slash

* eliminate trailing slash in the config, addresses, and commits path suffix as it was erroneous

* extract the location string building to a function
  • Loading branch information
Jonathan-Rosenberg authored Jun 21, 2022
1 parent a21c0fc commit b4e9ac4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -356,14 +356,17 @@ object GarbageCollector {
println("Expired addresses:")
expiredAddresses.show()

val storageNamespace = new ApiClient(apiURL, accessKey, secretKey).getStorageNamespace(repo)
var storageNamespace = new ApiClient(apiURL, accessKey, secretKey).getStorageNamespace(repo)
if (!storageNamespace.endsWith("/")) {
storageNamespace += "/"
}

val removed =
remove(storageNamespace, gcAddressesLocation, expiredAddresses, runID, region, hcValues)

val commitsDF = getCommitsDF(runID, gcCommitsLocation, spark)
val reportLogsDst = s"${storageNamespace}/_lakefs/logs/gc/summary/"
val reportExpiredDst = s"${storageNamespace}/_lakefs/logs/gc/expired_addresses/"
val reportLogsDst = concatToGCLogsPrefix(storageNamespace, "summary")
val reportExpiredDst = concatToGCLogsPrefix(storageNamespace, "expired_addresses")

val time = DateTimeFormatter.ISO_INSTANT.format(java.time.Clock.systemUTC.instant())
writeParquetReport(commitsDF, reportLogsDst, time, "commits.parquet")
Expand All @@ -375,11 +378,16 @@ object GarbageCollector {
.write
.partitionBy("run_id")
.mode(SaveMode.Overwrite)
.parquet(s"${storageNamespace}/_lakefs/logs/gc/deleted_objects/${time}/deleted.parquet")
.parquet(concatToGCLogsPrefix(storageNamespace, s"deleted_objects/$time/deleted.parquet"))

spark.close()
}

private def concatToGCLogsPrefix(storageNameSpace: String, key: String): String = {
val strippedKey = key.stripPrefix("/")
s"${storageNameSpace}_lakefs/logs/gc/$strippedKey"
}

private def repartitionBySize(df: DataFrame, maxSize: Int, column: String): DataFrame = {
val nRows = df.count()
val nPartitions = math.max(1, math.ceil(nRows / maxSize)).toInt
Expand Down
6 changes: 3 additions & 3 deletions pkg/graveler/retention/garbage_collection_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ import (
)

const (
configFileSuffixTemplate = "/%s/retention/gc/rules/config.json"
addressesFilePrefixTemplate = "/%s/retention/gc/addresses/"
commitsFileSuffixTemplate = "/%s/retention/gc/commits/run_id=%s/commits.csv"
configFileSuffixTemplate = "%s/retention/gc/rules/config.json"
addressesFilePrefixTemplate = "%s/retention/gc/addresses/"
commitsFileSuffixTemplate = "%s/retention/gc/commits/run_id=%s/commits.csv"
)

type GarbageCollectionManager struct {
Expand Down

0 comments on commit b4e9ac4

Please sign in to comment.