Skip to content

Commit

Permalink
fix: ⚡ add test and configuration for existing file uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
Ignazio Bovo committed Aug 28, 2024
1 parent 6e1f6e0 commit ebd0f03
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -200,4 +200,4 @@ CLOUD_STORAGE_PROVIDER_NAME=aws
AWS_ACCESS_KEY_ID=test
AWS_SECRET_ACCESS_KEY=test
AWS_REGION=eu-west-1
AWS_BUCKET_NAME=sample-bucket
AWS_BUCKET_NAME=test-bucket
52 changes: 35 additions & 17 deletions storage-node/src/services/storageProviders/IConnectionHandler.ts
Original file line number Diff line number Diff line change
@@ -1,35 +1,53 @@
export type UploadFileOutput = {
key: string
filePath: string
}

export type UploadFileIfNotExistsOutput = {
key: string
filePath: string
alreadyExists: boolean
}
/**
* Represents an abstract connection handler for a storage provider.
*/
/**
* Represents a connection handler for interacting with a remote storage bucket.
* Represents a connection handler for interacting with a remote storage unit.
* The storage unit can be a bucket in S3, a container in Azure Blob Storage, or similar concepts in other cloud storage services.
* Within this storage unit, objects are organized using keys. A key is a string that defines the location of an object
* within the storage unit. Keys use the format "<directory>/<filename>" with "/" as a delimiter to separate directories.
*/
export interface IConnectionHandler {
/**
* Asynchronously uploads a file to the remote bucket.
* @param filename - The key of the file in the remote bucket.
* @param filePath - The file path of the file to upload.
* Asynchronously uploads an object to the storage unit. It doesn't check if the object already exists.
* @param key - The key of the object in the storage unit.
* @param filePath - The local file path of the object to upload.
* @returns A promise that resolves when the upload is complete or rejects with an error.
*/
uploadFileToRemoteBucket(key: string, filePath: string): Promise<UploadFileOutput>

/**
* Asynchronously uploads an object to the storage unit if it does not exist.
* @param key - The key of the object in the storage unit.
* @param filePath - The local file path of the object to upload.
* @returns A promise that resolves when the upload is complete or rejects with an error.
*/
uploadFileToRemoteBucket(filename: string, filePath: string): Promise<any>
uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise<UploadFileIfNotExistsOutput>

/**
* Asynchronously retrieves a file from the remote bucket.
* @param filename - The key of the file to retrieve from the remote bucket.
* @returns A promise that resolves in the presigned URL of the file or rejects with an error, 1h expiry
* Asynchronously retrieves a presigned URL for an object in the storage unit.
* @param key - The key of the object in the storage unit.
* @returns A promise that resolves with the presigned URL of the object (1h expiry) or rejects with an error.
*/
getRedirectUrlForObject(filename: string): Promise<string>
getRedirectUrlForObject(key: string): Promise<string>

/**
* Asynchronously lists ALL files in the remote bucket, to be used during cache initialization only as it can be very slow.
* @returns A promise that resolves with an array of file keys or rejects with an error.
* Asynchronously lists ALL objects in the storage unit. To be used during cache initialization only as it can be very slow.
* @returns A promise that resolves with an array of object keys or rejects with an error.
*/
listFilesOnRemoteBucket(): Promise<string[]>

/**
* Asynchronously removes a file from the remote bucket.
* @param filename - The key of the file to remove from the remote bucket.
* Asynchronously removes an object from the storage unit.
* @param key - The key of the object to remove from the storage unit.
* @returns A promise that resolves when the removal is complete or rejects with an error.
*/
removeObject(filename: string): Promise<void>
removeObject(key: string): Promise<void>
}
85 changes: 78 additions & 7 deletions storage-node/src/services/storageProviders/awsConnectionHandler.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import { IConnectionHandler } from './IConnectionHandler'
import { IConnectionHandler, UploadFileIfNotExistsOutput, UploadFileOutput } from './IConnectionHandler'
import {
CreateMultipartUploadCommand,
DeleteObjectCommand,
GetObjectCommand,
HeadObjectCommand,
ListObjectsCommand,
ListObjectsCommandInput,
PutObjectCommand,
PutObjectCommandInput,
S3Client,
} from '@aws-sdk/client-s3'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
Expand Down Expand Up @@ -66,27 +68,75 @@ export class AwsConnectionHandler implements IConnectionHandler {
// Response status code info: https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
return response.$metadata.httpStatusCode === 200
}

private fileNotFoundResponse(response: any): boolean {
return response.$metadata.httpStatusCode === 404
}

private insufficientPermissionsResponse(response: any): boolean {
return response.$metadata.httpStatusCode === 403
}

private isMultiPartNeeded(filePath: string): boolean {
const stats = fs.statSync(filePath)
const fileSizeInBytes = stats.size
const fileSizeInGigabytes = fileSizeInBytes / (1024 * 1024 * 1024)
return fileSizeInGigabytes > this.multiPartThresholdGB
}

async uploadFileToRemoteBucket(filename: string, filePath: string): Promise<any> {
const input = {
async uploadFileToRemoteBucket(key: string, filePath: string): Promise<UploadFileOutput> {
try {
await this.uploadFileToAWSBucket(key, filePath)
return {
key,
filePath,
}
} catch (error) {
throw error
}
}

async uploadFileToRemoteBucketIfNotExists(key: string, filePath: string): Promise<UploadFileIfNotExistsOutput> {
// check if file exists at key
const fileExists = await this.checkIfFileExists(key)
// if it does, return
if (fileExists) {
return {
key,
filePath,
alreadyExists: true,
}
}
// if it doesn't, upload the file
try {
await this.uploadFileToAWSBucket(key, filePath)
return {
key,
filePath,
alreadyExists: false,
}
} catch (error) {
throw error
}
}

private async uploadFileToAWSBucket(filename: string, filePath: string): Promise<any> {
const fileStream = fs.createReadStream(filePath)

const input: PutObjectCommandInput = {
Bucket: this.bucket,
Key: filename,
Body: filePath,
Body: fileStream,
}

// Uploading files to the bucket: multipart
const command = this.isMultiPartNeeded(filePath)
? new CreateMultipartUploadCommand(input)
: new PutObjectCommand(input)
const response = await this.client.send(command)
if (!this.isSuccessfulResponse(response)) {
throw new Error('Failed to upload file to S3')
try {
return await this.client.send(command)
} catch (error) {
throw error
}
}

Expand Down Expand Up @@ -136,4 +186,25 @@ export class AwsConnectionHandler implements IConnectionHandler {

await this.client.send(new DeleteObjectCommand(input))
}

private async checkIfFileExists(filename: string): Promise<boolean> {
const input = {
Bucket: this.bucket,
Key: filename,
}

const command = new HeadObjectCommand(input)
const response = await this.client.send(command)
if (this.isSuccessfulResponse(response)) {
return true
} else if (this.fileNotFoundResponse(response)) {
return false
} else if (this.insufficientPermissionsResponse(response)) {
throw new Error('Insufficient permissions to check if file exists in S3 bucket')
} else {
throw new Error(
`Unknown error when checking if file exists in S3 bucket: error ${response.$metadata.httpStatusCode}`
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import { GetObjectCommand, S3Client } from '@aws-sdk/client-s3'
import { AwsConnectionHandler } from '../../awsConnectionHandler'
import { readFileSync } from 'fs'
import { expect } from '@jest/globals'
/**
* @group integration
* Test 1
* Arrange : localstack running with S3 bucket 'test-bucket'
* Act: Upload file using AwsConnectionHandler './testfile.txt' to the bucket in 'testDir/testfile.txt'
* Assert: using aws api download the file and compare hash for equality
* Test 2
* Arrange : localstack running with S3 bucket 'test-bucket'
* Act: Upload file './testfile.txt' to the bucket in 'testDir/testfile.txt'
* Assert: using aws api download the file and compare hash for equality
*/

const awsOptions = {
accessKeyId: 'test',
secretAccessKey: 'test',
region: 'us-east-1',
bucketName: 'test-bucket',
}
describe('AwsConnectionHandler full test suite', () => {
const s3Client = new S3Client({
region: awsOptions.region,
credentials: {
accessKeyId: awsOptions.accessKeyId,
secretAccessKey: awsOptions.secretAccessKey,
},
forcePathStyle: true,
tls: false,
endpoint: 'http://localhost:4566',
})
const awsConnectionHandler = new AwsConnectionHandler(awsOptions)

it('upload file content correctly', async () => {
const filePath = './testfile.txt'
const bucketPath = 'testDir/testfile.txt'

await awsConnectionHandler.uploadFileToRemoteBucket(filePath, bucketPath)

const { Body } = await s3Client.send(
new GetObjectCommand({
Bucket: awsOptions.bucketName,
Key: bucketPath,
})
)
const expectedFileContent = readFileSync(filePath)
const fileContent = await Body?.transformToByteArray()
expect(fileContent).toEqual(expectedFileContent)
})
it('no op upload if file already exists', async () => {
const filePath = './testfile.txt'
const bucketPath = 'testDir/testfile.txt'

await awsConnectionHandler.uploadFileToRemoteBucket(filePath, bucketPath)
})
})
2 changes: 1 addition & 1 deletion storage-node/src/services/storageProviders/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dotenv.config()
* @returns A promise that resolves to an instance of IConnectionHandler if the provider is enabled, otherwise undefined.
* @throws An error if the storage cloud provider type is invalid or unsupported.
*/
export async function parseConfigOptionAndBuildConnection(): Promise<IConnectionHandler | undefined> {
export function parseConfigOptionAndBuildConnection(): IConnectionHandler | undefined {
const enableProvider = process.env.ENABLE_STORAGE_PROVIDER === 'true'
if (!enableProvider) {
return undefined
Expand Down

0 comments on commit ebd0f03

Please sign in to comment.