Skip to content

Commit

Permalink
New vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasahle committed Dec 4, 2023
1 parent 3d9186f commit 4fbbd67
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 23 deletions.
21 changes: 15 additions & 6 deletions compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,24 @@ def dequantize_8bit(quantized, min_val, max_val):

def main(args):
vecs = np.load(args.vectors)

n, dim = vecs.shape
if dim != 300:
from sklearn.decomposition import PCA
vecs = PCA(n_components=300).fit_transform(vecs)

# vecs /= np.linalg.norm(vecs, axis=1, keepdims=True)
with open(args.words) as file:
words = file.readlines()

good_words = set()
print(args.f)
for path in args.f:
with open(path) as file:
good_words |= {line.lower().strip() for line in file}
if not args.f:
good_words = set(words)
else:
good_words = set()
print(args.f)
for path in args.f:
with open(path) as file:
good_words |= {line.lower().strip() for line in file}
print(f'{len(good_words)=}')
print(list(good_words)[:3])

Expand Down Expand Up @@ -75,7 +84,7 @@ def main(args):
if merr < best_err:
best_err = merr
best_alpha = alpha
# print(f"{alpha}, Mean error: {merr}")
print(f"{alpha}, Mean error: {merr}")
print(f"{best_alpha=}")

compressed, min_val, max_val = quantize_8bit(x, best_alpha)
Expand Down
27 changes: 13 additions & 14 deletions website/game.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ if (!isIOSVersionAtLeast(16)) {
// Start heavy promises
const root = '/website/model';
const prom = Promise.all([
fetchVectors(root + '/vecs.gz'),
//fetchVectors(root + '/vecs.gz'),
fetchVectors(root + '/angel.gz'),
fetchWordsGz(root + '/words.gz'),
fetchWords(root + '/stopwords')
]);
Expand Down Expand Up @@ -565,29 +566,27 @@ function fetchVectors(path) {
})
.then(decompressedBuffer => {
const dim = 300;
//const rows = 20000;
const rows = 9910;
const byteArray = new Uint8Array(decompressedBuffer);
// console.log(byteArray);
const min_val=-2.645588700353074;
const max_val=2.6333964024164196;
//min=-2.6348934823495345, max=2.6430343918786767

// Glove:
//const min_val=-2.645588700353074;
//const max_val=2.6333964024164196;

// Angel:
const min_val=-3.508529352673804;
const max_val=4.6301482913369485;

// Dequantize
const quantizedMatrix = mlMatrix.Matrix.from1DArray(rows, dim, byteArray);
let matrix = quantizedMatrix.div(255).mul(max_val - min_val).add(min_val);

//console.log(matrix);
//const norms = matrix.mul(matrix).sum('row').sqrt();

// Normalize
for (let i = 0; i < matrix.rows; i++) {
let row = matrix.getRow(i);
let norm = Math.sqrt(row.reduce((sum, value) => sum + value * value, 0));
matrix.setRow(i, row.map(value => value / norm));
}
// console.log("TOP ROW");
// console.log(matrix.getRow(0));


// console.log(matrix);
return matrix;
})
.catch(error => console.error('Error loading file:', error));
Expand Down
Binary file added website/model/angel.gz
Binary file not shown.
3 changes: 0 additions & 3 deletions website/model/stopwords
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ crow
lesbian
jerk
breast
finger
prick
trash
gay
Expand All @@ -125,10 +124,8 @@ snatch
slit
nipple
tramp
meat
ape
retard
bum
wound
trey
sexist
4 changes: 4 additions & 0 deletions website/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ h2 {
ul, h3 {
margin-bottom: 1rem;
}
ul {
padding-left: 1rem;
}
h3 {
font-weight: normal;
}
Expand Down Expand Up @@ -475,3 +478,4 @@ body:not(.today) .is-today {
left: 50%;
transform: translate(-50%, -110%);
}

0 comments on commit 4fbbd67

Please sign in to comment.