forked from ankane/neighbor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.rb
55 lines (47 loc) · 1.47 KB
/
example.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
require "bundler/setup"
require "json"
require "net/http"
require "active_record"
require "neighbor"
ActiveRecord::Base.establish_connection adapter: "postgresql", database: "neighbor_test"
ActiveRecord::Schema.verbose = false
ActiveRecord::Schema.define do
enable_extension "vector"
create_table :documents, force: true do |t|
t.text :content
t.bit :embedding, limit: 1024
end
end
class Document < ActiveRecord::Base
has_neighbors :embedding
end
# https://docs.cohere.com/reference/embed
def fetch_embeddings(input, input_type)
url = "https://api.cohere.com/v1/embed"
headers = {
"Authorization" => "Bearer #{ENV.fetch("CO_API_KEY")}",
"Content-Type" => "application/json"
}
data = {
texts: input,
model: "embed-english-v3.0",
input_type: input_type,
embedding_types: ["ubinary"]
}
response = Net::HTTP.post(URI(url), data.to_json, headers).tap(&:value)
JSON.parse(response.body)["embeddings"]["ubinary"].map { |e| e.map { |v| v.chr.unpack1("B*") }.join }
end
input = [
"The dog is barking",
"The cat is purring",
"The bear is growling"
]
embeddings = fetch_embeddings(input, "search_document")
documents = []
input.zip(embeddings) do |content, embedding|
documents << {content: content, embedding: embedding}
end
Document.insert_all!(documents)
query = "forest"
query_embedding = fetch_embeddings([query], "search_query")[0]
pp Document.nearest_neighbors(:embedding, query_embedding, distance: "hamming").first(5).map(&:content)