diff --git a/compress.py b/compress.py index ece4f50..3565501 100755 --- a/compress.py +++ b/compress.py @@ -30,15 +30,24 @@ def dequantize_8bit(quantized, min_val, max_val): def main(args): vecs = np.load(args.vectors) + + n, dim = vecs.shape + if dim != 300: + from sklearn.decomposition import PCA + vecs = PCA(n_components=300).fit_transform(vecs) + # vecs /= np.linalg.norm(vecs, axis=1, keepdims=True) with open(args.words) as file: words = file.readlines() - good_words = set() - print(args.f) - for path in args.f: - with open(path) as file: - good_words |= {line.lower().strip() for line in file} + if not args.f: + good_words = set(words) + else: + good_words = set() + print(args.f) + for path in args.f: + with open(path) as file: + good_words |= {line.lower().strip() for line in file} print(f'{len(good_words)=}') print(list(good_words)[:3]) @@ -75,7 +84,7 @@ def main(args): if merr < best_err: best_err = merr best_alpha = alpha - # print(f"{alpha}, Mean error: {merr}") + print(f"{alpha}, Mean error: {merr}") print(f"{best_alpha=}") compressed, min_val, max_val = quantize_8bit(x, best_alpha) diff --git a/website/game.js b/website/game.js index d2193d2..0df16ca 100644 --- a/website/game.js +++ b/website/game.js @@ -8,7 +8,8 @@ if (!isIOSVersionAtLeast(16)) { // Start heavy promises const root = '/website/model'; const prom = Promise.all([ - fetchVectors(root + '/vecs.gz'), + //fetchVectors(root + '/vecs.gz'), + fetchVectors(root + '/angel.gz'), fetchWordsGz(root + '/words.gz'), fetchWords(root + '/stopwords') ]); @@ -565,29 +566,27 @@ function fetchVectors(path) { }) .then(decompressedBuffer => { const dim = 300; - //const rows = 20000; const rows = 9910; const byteArray = new Uint8Array(decompressedBuffer); - // console.log(byteArray); - const min_val=-2.645588700353074; - const max_val=2.6333964024164196; - //min=-2.6348934823495345, max=2.6430343918786767 + + // Glove: + //const min_val=-2.645588700353074; + //const max_val=2.6333964024164196; + + // Angel: + const min_val=-3.508529352673804; + const max_val=4.6301482913369485; + + // Dequantize const quantizedMatrix = mlMatrix.Matrix.from1DArray(rows, dim, byteArray); let matrix = quantizedMatrix.div(255).mul(max_val - min_val).add(min_val); - //console.log(matrix); - //const norms = matrix.mul(matrix).sum('row').sqrt(); - + // Normalize for (let i = 0; i < matrix.rows; i++) { let row = matrix.getRow(i); let norm = Math.sqrt(row.reduce((sum, value) => sum + value * value, 0)); matrix.setRow(i, row.map(value => value / norm)); } - // console.log("TOP ROW"); - // console.log(matrix.getRow(0)); - - - // console.log(matrix); return matrix; }) .catch(error => console.error('Error loading file:', error)); diff --git a/website/model/angel.gz b/website/model/angel.gz new file mode 100644 index 0000000..652956f Binary files /dev/null and b/website/model/angel.gz differ diff --git a/website/model/stopwords b/website/model/stopwords index 123d8bf..6c63463 100644 --- a/website/model/stopwords +++ b/website/model/stopwords @@ -100,7 +100,6 @@ crow lesbian jerk breast -finger prick trash gay @@ -125,10 +124,8 @@ snatch slit nipple tramp -meat ape retard bum -wound trey sexist diff --git a/website/styles.css b/website/styles.css index 3550335..62834a5 100644 --- a/website/styles.css +++ b/website/styles.css @@ -71,6 +71,9 @@ h2 { ul, h3 { margin-bottom: 1rem; } +ul { + padding-left: 1rem; +} h3 { font-weight: normal; } @@ -475,3 +478,4 @@ body:not(.today) .is-today { left: 50%; transform: translate(-50%, -110%); } +