-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPanversal-ChromaDB-client-integration.js
219 lines (199 loc) · 8.19 KB
/
Panversal-ChromaDB-client-integration.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/**
* Universal ChromaDB Integration Template for Node.js
*
* This script is a comprehensive guide for integrating ChromaDB with various document sources and credential management options.
* It is designed to cater to users with varying levels of coding experience. The script includes sections for fetching documents
* from Local Files, Google Drive, OneDrive, Dropbox, iCloud, Web, Databases and includes credential management options using .env
* files or Key Vault Managers like Google Vault, AWS Key Management Services, Azure Key Vault, Hashicorp Vault, etc.
*
* Before proceeding, ensure you have installed the necessary Node.js packages:
* - chromadb (assuming there's a Node.js library for ChromaDB)
* - dotenv (for .env approach)
* - googleapis (for Google Drive integration)
* - axios (for fetching documents from the web)
* - The respective libraries if you're utilizing a Key-Vault Manager approach
* - Other libraries as needed based on your document sources and credential management choices
*
* The script allows for pulling data from multiple sources simultaneously, as long as the corresponding
* sections are uncommented and properly configured. Each section is independent, so you can activate as many or as few as needed
* for your use case.
*
* Incorporating Key Vault Managers can enhance security by managing sensitive information like credentials and API keys.
*
* Please note that for Apple's iCloud, accessing documents programmatically may be more complex due to Apple's security measures,
* and it may require additional steps not covered in this template.
*/
// Load the environment variables from the .env file (if using .env approach)
require('dotenv').config();
// Import additional libraries as needed based on your document source and key management strategy
const chromadb = require('chromadb'); // Assuming chromadb has a Node.js library
const fs = require('fs');
const path = require('path');
const {google} = require('googleapis');
const axios = require('axios');
// Load your ChromaDB Client
const chromaClient = new chromadb.Client();
// Create or access your collection
const collectionName = "your_collection_name"; // Replace with your actual collection name
let collection;
try {
collection = chromaClient.createCollection(collectionName);
} catch (error) {
collection = chromaClient.getCollection(collectionName);
}
// Initialize arrays to store documents, metadatas, and ids
const documents = [];
const metadatas = [];
const ids = [];
// Credential Management Options
// This template includes placeholders for integrating various Key Vault Managers.
// Replace these with the actual implementation details specific to your chosen
// Key Vault Manager. Each Key Vault Manager has its own methods for accessing
// and retrieving secrets, so you'll need to refer to their respective documentation
// for precise implementation details.
// Uncomment and configure the sections as needed based on your document source and key management strategy
// ---------------------------
// Local Files
// ---------------------------
const directoryPath = "path_to_your_documents"; // Replace with the path to your documents
fs.readdirSync(directoryPath).forEach(filename => {
if (filename.endsWith(".txt")) { // or .pdf, .docx, etc. based on your files
const filePath = path.join(directoryPath, filename);
// Read the file content
const content = fs.readFileSync(filePath, 'utf-8');
documents.push(content);
metadatas.push({"filename": filename}); // Add any other metadata you might have
ids.push(filename); // Or any other unique identifier
}
});
// ---------------------------
// Google Drive (requires setup)
// ---------------------------
const credentials = require('./credentials.json'); // Replace with the path to your credentials file
const scopes = ['https://www.googleapis.com/auth/drive'];
const auth = new google.auth.JWT(
credentials.client_email, null,
credentials.private_key, scopes
);
const drive = google.drive({version: 'v3', auth});
// List files from Google Drive
drive.files.list({
pageSize: 10,
fields: 'files(id, name)',
}, (err, res) => {
if (err) return console.error('The API returned an error:', err);
const files = res.data.files;
if (files.length) {
// Process files
} else {
console.log('No files found.');
}
});
// ---------------------------
// Fetch from Web
// ---------------------------
const urls = ["http://example.com/document1", "http://example.com/document2"]; // Replace with your document URLs
urls.forEach(async (url) => {
const response = await axios.get(url);
documents.push(response.data);
metadatas.push({"url": url});
ids.push(url); // Or any other unique identifier
});
// ---------------------------
// OneDrive (requires setup)
// ---------------------------
const { Client } = require('@microsoft/microsoft-graph-client');
require('isomorphic-fetch');
const authProvider = {
async getAccessToken() {
// Implement getting an access token
return process.env.ONEDRIVE_ACCESS_TOKEN;
}
};
const client = Client.initWithMiddleware({ authProvider });
async function listOneDriveFiles() {
try {
const response = await client.api('/me/drive/root/children').get();
response.value.forEach(file => {
// Download and read file content
const content = await client.api(`/me/drive/items/${file.id}/content`).get();
documents.push(content);
metadatas.push({"onedrive_id": file.id});
ids.push(file.id);
});
} catch (error) {
console.error(error);
}
}
listOneDriveFiles();
// ---------------------------
// Dropbox (requires setup)
// ---------------------------
const Dropbox = require('dropbox').Dropbox;
const dbx = new Dropbox({ accessToken: process.env.DROPBOX_ACCESS_TOKEN });
async function listDropboxFiles() {
try {
const response = await dbx.filesListFolder({ path: '' });
response.result.entries.forEach(async (entry) => {
const metadata = await dbx.filesDownload({ path: entry.path_lower });
const content = metadata.result.fileBinary;
documents.push(content);
metadatas.push({"dropbox_id": metadata.result.id});
ids.push(metadata.result.id);
});
} catch (error) {
console.error(error);
}
}
listDropboxFiles();
// ---------------------------
// Apple's iCloud (requires setup)
// ---------------------------
// Note: Accessing iCloud via third-party services can be complex due to Apple's security measures.
// The following is a hypothetical example and may not work without additional setup and authentication.
const iCloud = require('some-icloud-library'); // Replace with an actual Node.js library for iCloud
const icloud = new iCloud(process.env.APPLE_ID, process.env.ICLOUD_PASSWORD);
async function listICloudFiles() {
try {
const files = await icloud.drive['Documents'].list();
files.forEach(async (file) => {
const content = await icloud.drive.download(file);
documents.push(content);
metadatas.push({"icloud_id": file.id});
ids.push(file.id);
});
} catch (error) {
console.error(error);
}
}
listICloudFiles();
// ---------------------------
// Database (generic SQL)
// ---------------------------
const { Pool } = require('pg');
const pool = new Pool({
user: process.env.DB_USER,
host: process.env.DB_HOST,
database: process.env.DB_NAME,
password: process.env.DB_PASSWORD,
port: process.env.DB_PORT,
});
async function listDatabaseDocuments() {
try {
const response = await pool.query('SELECT document_content, document_id, document_metadata FROM your_table');
response.rows.forEach(row => {
documents.push(row.document_content);
metadatas.push({"source": "your_database", "additional_metadata": row.document_metadata});
ids.push(row.document_id);
});
} catch (error) {
console.error(error);
}
}
listDatabaseDocuments();
// Query the collection (optional, for testing)
const results = collection.query(
["This is a query document"], // query_texts
2 // n_results
);
console.log(results);