Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bloom filter to KV reads/writes #43

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ cfg-if = "0.1.2"
wasm-bindgen = "0.2"
js-sys = "0.3.8"
imageproc = { version = "0.22.0", default-features = false }
twox-hash = "1.6.2"

# The `console_error_panic_hook` crate provides better debugging of panics by
# logging them with `console.error`. This is great for development, but requires
Expand Down
10,156 changes: 0 additions & 10,156 deletions package-lock.json

This file was deleted.

3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
"@cloudflare/kv-asset-handler": "^0.1.1",
"@types/base64-js": "^1.2.5",
"@types/webpack-env": "^1.15.0",
"base64-js": "^1.3.1",
"bloom-filters": "^1.3.4"
"base64-js": "^1.3.1"
}
}
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
extern crate cfg_if;
extern crate image;
extern crate wasm_bindgen;
extern crate twox_hash;

mod utils;
mod skin;

use std::hash::{Hash,Hasher};
use cfg_if::cfg_if;
use js_sys::Uint8Array;
use wasm_bindgen::prelude::*;
use skin::*;
use image::DynamicImage;
use twox_hash::{XxHash32};

cfg_if! {
// When the `wee_alloc` feature is enabled, use `wee_alloc` as the global
Expand Down Expand Up @@ -89,4 +92,11 @@ pub fn get_rendered_image(skin_image: Uint8Array, size: u32, what: String, armor
return Err(js_sys::Error::new("Couldn't load skin.").into());
}
}
}

#[wasm_bindgen]
pub fn xxhash(value: String, seed: u32) -> u64 {
let mut hasher = XxHash32::with_seed(seed);
value.hash(&mut hasher);
hasher.finish()
}
4 changes: 2 additions & 2 deletions worker/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { getAssetFromKV } from "@cloudflare/kv-asset-handler"
import {interpretRequest, CraftheadRequest, RequestedKind} from './request';
import MojangRequestService from './services/mojang/service';
import {getRenderer} from './wasm';
import {getWASMModule} from './wasm';
import PromiseGatherer from "./promise_gather";
import {CachedMojangApiService, DirectMojangApiService} from "./services/mojang/api";
import { default as CACHE_BUST } from './util/cache-bust';
Expand Down Expand Up @@ -127,7 +127,7 @@ async function renderImage(skin: Response, request: CraftheadRequest): Promise<R
const {size, requested, armored} = request;
const destinationHeaders = new Headers(skin.headers);
const slim = destinationHeaders.get('X-Crafthead-Skin-Model') === 'slim';
const [renderer, skinArrayBuffer] = await Promise.all([getRenderer(), skin.arrayBuffer()]);
const [renderer, skinArrayBuffer] = await Promise.all([getWASMModule(), skin.arrayBuffer()]);
const skinBuf = new Uint8Array(skinArrayBuffer);

let which: string
Expand Down
23 changes: 10 additions & 13 deletions worker/services/mojang/api.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import PromiseGatherer from "../../promise_gather";
import { CacheComputeResult } from "../../util/cache-helper";
import { default as CACHE_BUST } from '../../util/cache-bust';

declare const CRAFTHEAD_PROFILE_CACHE: KVNamespace;
import { KVManager } from '../../util/kv-manager';

export interface MojangProfile {
id: string;
Expand Down Expand Up @@ -43,21 +42,20 @@ export class CachedMojangApiService implements MojangApiService {
return localCacheResult.json();
}

const kvResult: MojangUsernameLookupResult | null = await CRAFTHEAD_PROFILE_CACHE.get('username-lookup:' + lowercased, 'json');
const kvResult = await KVManager.get('username-lookup:' + lowercased);
if (kvResult) {
gatherer?.push(caches.default.put(new Request(localCacheKey), new Response(
JSON.stringify(kvResult), { headers: { 'Cache-Control': 'max-age=3600', 'Content-Type': 'application/json' }}
kvResult, { headers: { 'Cache-Control': 'max-age=3600', 'Content-Type': 'application/json' }}
)));
return kvResult;
return JSON.parse(kvResult);
}

const lookup = await this.delegate.lookupUsername(lowercased, gatherer);
if (lookup) {
gatherer?.push(
CRAFTHEAD_PROFILE_CACHE.put(
KVManager.put(
'username-lookup:' + lowercased,
JSON.stringify(lookup),
{ expirationTtl: 86400 }
JSON.stringify(lookup)
)
);
gatherer?.push(caches.default.put(new Request(localCacheKey), new Response(
Expand All @@ -69,21 +67,20 @@ export class CachedMojangApiService implements MojangApiService {
}

async fetchProfile(id: string, gatherer: PromiseGatherer | null): Promise<CacheComputeResult<MojangProfile | null>> {
const kvResult: MojangProfile | null = await CRAFTHEAD_PROFILE_CACHE.get('profile-lookup:' + id, 'json');
const kvResult = await KVManager.get('profile-lookup:' + id);
if (kvResult !== null) {
return {
result: kvResult,
result: JSON.parse(kvResult),
source: 'cf-kv'
};
}

const lookup = await this.delegate.fetchProfile(id, gatherer);
if (lookup) {
gatherer?.push(
CRAFTHEAD_PROFILE_CACHE.put(
KVManager.put(
'profile-lookup:' + id,
JSON.stringify(lookup.result),
{ expirationTtl: 86400 }
JSON.stringify(lookup.result)
)
);
}
Expand Down
72 changes: 72 additions & 0 deletions worker/util/bloom-filter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import PromiseGatherer from '../promise_gather';
import { getWASMModule } from '../wasm';
import { KVDirect, KVExpiration } from './kv-manager';

const epsilon = 0.05; // False positive tolerance
const n = 5; // Avg. items in Bloom filter per day

export class BloomFilter {
private static seed = 8149214274; // Randomly chosen, nothing special

// See https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions
private static m = Math.ceil(-((n * Math.log(epsilon)) / (Math.log(2) ** 2)));
private static k = Math.ceil(-(Math.log(epsilon) / Math.log(2)));

static async add(element: string): Promise<void> {
if (await KVDirect.get('bloom:0') === null) { // Check if Bloom filter exists in KV
await this.allocate();
}

const gatherer = new PromiseGatherer();

const indexes = await this.getIndexes(element);
for (const index of indexes) {
gatherer.push(KVDirect.put('bloom:' + index, '1', KVExpiration.TIMED));
}

return gatherer.all();
}

static async has(element: string): Promise<boolean> {
const indexes = await this.getIndexes(element);
for (const index of indexes) {
const value = await KVDirect.get('bloom:' + index)

if (value === null) {
await this.allocate();
return false;
} else if (value === '0') {
return false;
}
}

return true;
}

// See https://willwhim.wpengine.com/2011/09/03/producing-n-hash-functions-by-hashing-only-once/
private static async getIndexes(element: string): Promise<Set<number>> {
const [a, b] = await this.doubleHash(element);
let indexes = new Set<number>();
for (let i = 0; i < this.k; i++) {
indexes.add((a + b * i) % this.m);
}
return indexes;
}

private static async doubleHash(value: string): Promise<number[]> {
const wasm = await getWASMModule();
return [ Number(wasm.xxhash(value, this.seed)), Number(wasm.xxhash(value, this.seed + 1)) ];
}

private static async allocate(): Promise<void> {
const gatherer = new PromiseGatherer();

for (let i = 0; i < this.m; i++) {
gatherer.push(
KVDirect.put('bloom:' + i, '0', KVExpiration.TIMED)
);
}

return gatherer.all();
}
}
4 changes: 0 additions & 4 deletions worker/util/cache-helper.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import PromiseGatherer from "../promise_gather";

export interface CacheComputeResult<T> {
result: T;
source: string;
}

declare const CRAFTHEAD_PROFILE_CACHE: KVNamespace;
48 changes: 48 additions & 0 deletions worker/util/kv-manager.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { BloomFilter } from './bloom-filter';

declare const KV_CACHE_NAMESPACE: KVNamespace;

export enum KVExpiration {
PERIODIC,
TIMED,
NONE
}

export class KVManager {
static async get(key: string): Promise<string | null> {
return KVDirect.get(key);
}

static async put(key: string, value: string): Promise<void> {
const seen_before = await BloomFilter.has(key);
if (seen_before && await KVDirect.get(key) === null) {
return KVDirect.put(key, value, KVExpiration.PERIODIC);
}

return BloomFilter.add(key);
}
}

export class KVDirect {
private static namespace = KV_CACHE_NAMESPACE;

static async get(key: string): Promise<string|null> {
return this.namespace.get(key);
}

static async put(key: string, value: string, expiration = KVExpiration.NONE): Promise<void> {
switch (expiration) {
case KVExpiration.PERIODIC:
return this.namespace.put(key, value, { expirationTtl: 86400 });
case KVExpiration.TIMED:
const expirationTime = new Date();
expirationTime.setUTCDate(new Date().getUTCDate() + 1);
expirationTime.setHours(0, 0, 0);
const expirationEpoch = Math.floor(expirationTime.getTime() / 1000);
return this.namespace.put(key, value, { expiration: expirationEpoch });
case KVExpiration.NONE:
default:
return this.namespace.put(key, value);
}
}
}
3 changes: 2 additions & 1 deletion worker/wasm.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// This is a hack in order to get webpack to play nice with the included WebAssembly module.
// See https://github.com/rustwasm/wasm-bindgen/issues/700 for more details.
export async function getRenderer(): Promise<{
export async function getWASMModule(): Promise<{
get_rendered_image(skin_image: any, size: number, type: string, armored: boolean, slim: boolean): any;
xxhash(value: string, seed: number): bigint;
}> {
return new Promise((resolve, reject) => {
// We intentionally ignore the erros here. We know for a fact we are using webpack targeting CommonJS,
Expand Down
Loading