-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b799770
Showing
30 changed files
with
2,997 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
artifacts | ||
bin | ||
obj | ||
_Resharper* | ||
*.user | ||
*.build.csdef | ||
*.suo | ||
.vs/ | ||
csx | ||
Thumbs.db | ||
test/azure_connection.txt | ||
test/azure_dual_connection.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
Copyright (c) 2020, LOKAD SAS | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without modification, | ||
are permitted provided that the following conditions are met: | ||
|
||
* Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
|
||
* Redistributions in binary form must reproduce the above copyright notice, this | ||
list of conditions and the following disclaimer in the documentation and/or | ||
other materials provided with the distribution. | ||
|
||
* Neither the name of LOKAD SAS nor the names of its contributors may be used to | ||
endorse or promote products derived from this software without specific prior | ||
written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | ||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio Version 16 | ||
VisualStudioVersion = 16.0.29411.108 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lokad.ContentAddr.Azure", "src\Lokad.ContentAddr.Azure.csproj", "{AC03061D-6376-4918-89AE-7B1D6661AC94}" | ||
EndProject | ||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lokad.ContentAddr.Azure.Tests", "test\Lokad.ContentAddr.Azure.Tests.csproj", "{D348400F-7835-488E-A95E-79D1CB545327}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{AC03061D-6376-4918-89AE-7B1D6661AC94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{AC03061D-6376-4918-89AE-7B1D6661AC94}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{AC03061D-6376-4918-89AE-7B1D6661AC94}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{AC03061D-6376-4918-89AE-7B1D6661AC94}.Release|Any CPU.Build.0 = Release|Any CPU | ||
{D348400F-7835-488E-A95E-79D1CB545327}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{D348400F-7835-488E-A95E-79D1CB545327}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{D348400F-7835-488E-A95E-79D1CB545327}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{D348400F-7835-488E-A95E-79D1CB545327}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {48447358-1B7D-47DD-98CB-1E8D476C31A4} | ||
EndGlobalSection | ||
EndGlobal |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
Azure Blob Storage support for [Lokad.ContentAddr](https://github.com/Lokad/ContentAddr). | ||
|
||
The relevant types are `AzureStoreFactory` (for a multi-tenant store) and `AzureStore` (for | ||
a single-tenant store). These implement the `IStoreFactory` and `IStore` interfaces from | ||
the base Lokad.ContentAddr library. | ||
|
||
## Data Layout | ||
|
||
This creates two containers in the Azure Blob Storage account: | ||
|
||
- `persist` will contain the persistent blobs, named `<realm>/<hash>` (for instance, a | ||
blob `AC03061D6376491889AE7B1D6661AC94` in realm `85055` would be named | ||
`85055/AC03061D6376491889AE7B1D6661AC94`). | ||
|
||
- `staging` will contain temporary blobs as they are being uploaded to | ||
the content-addressable store. This container can be safely emptied without | ||
losing any committed data. | ||
|
||
This library supports two upload models for uploading blobs: | ||
|
||
### Uploading with the Lokad.ContentAddr library | ||
|
||
This uses the `IStore` interface to push data to Azure Blob Storage. | ||
For example: | ||
|
||
```c# | ||
AzureStore store = ...; | ||
store.WriteAsync(new byte[] {...}, cancel); | ||
``` | ||
|
||
More information about the implementation details (not relevant to normal use | ||
of the library) follows: | ||
|
||
Data is accumulated in-memory up to a certain point (around 4MB), after which | ||
a block blob is created in the `staging` container, and the data is written | ||
as separate blocks to that blob, to avoid consuming too much memory. | ||
|
||
The hash of the blob is calculated on-the-fly. | ||
|
||
Once the entire data has been provided, the hash becomes known. The library then | ||
determines a blob with the same hash already exists for the tenant. If it does, | ||
the temporary block blob is deleted. If it does not, the temporary block blob is | ||
committed, then copied to the `persist` container with the appropriate hash. | ||
|
||
### Uploading from a different source | ||
|
||
It is also possible to give a different client an upload link with a shared access | ||
signature (for instance, an in-browser JavaScript uploader). This is done | ||
using the appropriate method on the store: | ||
|
||
```c# | ||
AzureStore store = ...; | ||
string identifier = Guid.NewGuid().ToString(); | ||
TimeSpan life = TimeSpan.FromMinutes(10); | ||
Url url = store.GetSignedUploadUrl(identifier, life); | ||
``` | ||
|
||
This will create a Shared Access Signature that lasts 10 minutes, that allows the | ||
client to write to that specific blob in the `staging` container. | ||
|
||
Once the client has finished uploading, it should contact the issuer of the signed | ||
URL in order to commit the upload to the store. The issuer then invokes: | ||
|
||
```c# | ||
// Same store and identifier as were used to generate the URL | ||
AzureStore store = ...; | ||
string identifier = ...; | ||
await store.CommitTemporaryBlob(identifier, cancellationToken); | ||
``` | ||
|
||
This causes the store to download the entire uploaded blob, compute its hash, | ||
and then move it to the `persist` container (if a copy is not already present | ||
there). | ||
|
||
## Download links | ||
|
||
It is possible to produce a short-lived URL that allows anyone to download a | ||
blob from the `persist` container. | ||
|
||
```c# | ||
AzureStore store; | ||
IAzureBlobRef blob = store[new Hash("AC03061D6376491889AE7B1D6661AC94")]; | ||
Url download = await blob.GetDownloadUrlAsync( | ||
now: DateTime.UtcNow, | ||
life: TimeSpan.FromMinutes(10), | ||
filename: "budget.xlsx", | ||
contentType: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | ||
cancel: cancellationToken); | ||
``` | ||
|
||
The URL contains a `Content-Disposition: attachment` header that causes browsers | ||
to interpret it as a downloadable file (with the file name specified through the | ||
`filename` argument of the function). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
using Microsoft.WindowsAzure.Storage; | ||
using Microsoft.WindowsAzure.Storage.Blob; | ||
using System; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using System.Text.RegularExpressions; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace Lokad.ContentAddr.Azure | ||
{ | ||
/// <summary> The <see cref="IReadBlobRef"/> for an azure persistent store. </summary> | ||
/// <see cref="AzureReadOnlyStore"/> | ||
/// <remarks> | ||
/// Exposes the Azure Storage blob itself in <see cref="Blob"/>. | ||
/// </remarks> | ||
public sealed class AzureBlobRef : IAzureReadBlobRef | ||
{ | ||
public AzureBlobRef(string realm, Hash hash, CloudBlockBlob blob) | ||
{ | ||
Hash = hash; | ||
Realm = realm; | ||
Blob = blob; | ||
} | ||
|
||
/// <see cref="IReadBlobRef.Hash"/> | ||
public Hash Hash { get; } | ||
|
||
/// <summary> The blob name prefix. </summary> | ||
/// <remarks> | ||
/// Used to avoid accidentally mixing blobs from separate customers | ||
/// by using a separate prefix (the "realm") for each customer. | ||
/// </remarks> | ||
public string Realm { get; } | ||
|
||
/// <summary> The Azure Storage blob where the blob data is stored. </summary> | ||
public CloudBlockBlob Blob { get; } | ||
|
||
/// <see cref="IReadBlobRef.ExistsAsync"/> | ||
public Task<bool> ExistsAsync(CancellationToken cancel) => | ||
Blob.ExistsAsync(null, null, cancel); | ||
|
||
/// <see cref="IAzureReadBlobRef.GetBlob"/> | ||
public Task<CloudBlockBlob> GetBlob() => | ||
Task.FromResult(Blob); | ||
|
||
/// <see cref="IReadBlobRef.GetSizeAsync"/> | ||
public async Task<long> GetSizeAsync(CancellationToken cancel) | ||
{ | ||
// If properties are not loaded yet, they will either be null or | ||
// contain a length of -1. | ||
if (Blob.Properties != null && Blob.Properties.Length >= 0) | ||
return Blob.Properties.Length; | ||
|
||
try | ||
{ | ||
await AzureRetry.Do( | ||
c => Blob.FetchAttributesAsync(null, null, null, c), | ||
cancel).ConfigureAwait(false); | ||
return Blob.Properties.Length; | ||
} | ||
catch (StorageException e) when (e.RequestInformation.HttpStatusCode == 404) | ||
{ | ||
throw new NoSuchBlobException(Realm, Hash); | ||
} | ||
} | ||
|
||
/// <see cref="IReadBlobRef.OpenAsync"/> | ||
public async Task<Stream> OpenAsync(CancellationToken cancel) | ||
{ | ||
var size = await GetSizeAsync(cancel).ConfigureAwait(false); | ||
return new AzureReadStream(Blob, size); | ||
} | ||
|
||
/// <see cref="IAzureReadBlobRef.GetDownloadUrlAsync"/> | ||
public Task<Uri> GetDownloadUrlAsync( | ||
DateTime now, | ||
TimeSpan life, | ||
string filename, | ||
string contentType, | ||
CancellationToken cancel) | ||
{ | ||
var (asciiFilename, utf8Filename) = SanitizeFileName(filename); | ||
|
||
// This Content-Disposition header will be returned by Azure along with the blob. | ||
// It is constructed according to RFC6266 | ||
// https://tools.ietf.org/html/rfc6266 | ||
// | ||
// If the file name is ASCII-only, we use the standard for `attachment;filename="foo.tsv"` | ||
// which will cause the browser to save (because `attachment`) the blob as a file | ||
// named `foo.tsv`. | ||
// | ||
// If the file name contains non-ASCII characters, we include the `filename=` as an | ||
// ASCII-only fallback for older browsers (but it will be something like `data.tsv` | ||
// instead of the actual file name), and the `filename*=UTF-8''f%c3%a4.tsv` for modern | ||
// browsers, encoding according to RFC5987 | ||
// https://tools.ietf.org/html/rfc5987#section-3.2 | ||
|
||
var contentDisposition = | ||
utf8Filename != null | ||
? "attachment;filename=\"" + asciiFilename + "\";filename*=UTF-8''" + utf8Filename + "" | ||
: "attachment;filename=\"" + asciiFilename + "\""; | ||
|
||
var token = Blob.GetSharedAccessSignature(new SharedAccessBlobPolicy | ||
{ | ||
Permissions = SharedAccessBlobPermissions.Read, | ||
SharedAccessExpiryTime = new DateTimeOffset(now + life), | ||
SharedAccessStartTime = new DateTimeOffset(now.AddMinutes(-5)) | ||
}, new SharedAccessBlobHeaders | ||
{ | ||
ContentDisposition = contentDisposition, | ||
ContentType = contentType, | ||
ContentEncoding = null, | ||
}); | ||
|
||
return Task.FromResult(new Uri(Blob.Uri, token)); | ||
} | ||
|
||
/// <summary> Recognizes characters that will be replaced with a single '-'. </summary> | ||
private static readonly Regex BadFilenameCharacter = | ||
new Regex("[\\x00-\\x1F\\x7F/\\\\?%*:|*\"<>-]+", RegexOptions.Compiled); | ||
|
||
/// <summary> Sanitizes a file name for the content-disposition header. </summary> | ||
/// <remarks> | ||
/// Empty file names are replaced with `"data"`. | ||
/// | ||
/// Extension-only file names (like `".tsv"`) are prepended with `"data"` | ||
/// (for example `"data.tsv"`) | ||
/// | ||
/// Any characters that are not allowed in file names <see cref="BadFilenameCharacter"/> | ||
/// are replaced with `-` and consecutive `-` are combined into one. | ||
/// | ||
/// Any initial or final `-` or `.` are dropped. | ||
/// | ||
/// If the string contains any non-ASCII characters, it will return a dummy `data.ext` | ||
/// ASCII filename (keeping only the extension) and an UTF8 filename properly | ||
/// encoded according to RFC5987 (without the preceding `UTF-8''`) | ||
/// | ||
/// https://tools.ietf.org/html/rfc5987#section-3.2 | ||
/// | ||
/// If only ASCII characters are present, the returned UTF-8 filename will | ||
/// be null. | ||
/// </remarks> | ||
public static (string ascii, string utf8) SanitizeFileName(string filename) | ||
{ | ||
if (string.IsNullOrWhiteSpace(filename) || filename[0] == '.') | ||
filename = "data" + filename; | ||
|
||
filename = BadFilenameCharacter.Replace(filename, "-").Trim('-', '.'); | ||
|
||
if (filename == "") | ||
return ("data", null); | ||
|
||
if (filename.All(c => c < 127)) | ||
return (filename, null); | ||
|
||
// Found UTF8 characters. | ||
|
||
var utf8 = new StringBuilder(); | ||
var bytes = Encoding.UTF8.GetBytes(filename); | ||
foreach (var b in bytes) | ||
{ | ||
if (b >= 'a' && b <= 'z' || | ||
b >= 'A' && b <= 'Z' || | ||
b >= '0' && b <= '9' || | ||
b < 127 && ( | ||
b == '!' || | ||
b == '#' || | ||
b == '$' || | ||
b == '+' || | ||
b == '-' || | ||
b == '.' || | ||
b == '^' || | ||
b == '_' || | ||
b == '`' || | ||
b == '|' || | ||
b == '~')) | ||
{ | ||
// Characters explicitly allowed by RFC5987 may be kept verbatim, | ||
// all others are %-escaped. | ||
utf8.Append((char)b); | ||
} | ||
else | ||
{ | ||
utf8.Append($"%{b:x2}"); | ||
} | ||
} | ||
|
||
var ext = Path.GetExtension(filename) ?? ".bin"; | ||
if (ext == ".gz") ext = ".csv.gz"; | ||
|
||
return ("data" + ext, utf8.ToString()); | ||
} | ||
} | ||
} |
Oops, something went wrong.