Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push Files to Index from Obsidian, Emacs & Desktop Clients using Multi-Part Forms Method #499

Merged
merged 24 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6aa69da
Put indexer API endpoint under /api path segment
debanjum Oct 10, 2023
9ba173b
Improve emoji, message on content index updated via logger
debanjum Oct 12, 2023
60e9a61
Use multi-part form to receive files to index on server
debanjum Oct 12, 2023
68018ef
Use multi-part form to send files to index on desktop client
debanjum Oct 12, 2023
fc99431
Send files to index on server from the khoj.el emacs client
debanjum Oct 12, 2023
bed3aff
Update tests to test multi-part/form method of pushing files to index
debanjum Oct 12, 2023
292f042
Send content for indexing on server at a regular interval from khoj.el
debanjum Oct 13, 2023
bea196a
Explicitly make GET request to /config/data from khoj.el:khoj-server-…
debanjum Oct 13, 2023
b669aa2
Clean and fix the content indexing code in the Emacs client
debanjum Oct 14, 2023
f64fa06
Initialize the Khoj Transient menu on first run instead of load
debanjum Oct 14, 2023
79b3f82
Make khoj.el send files to be deleted from index to server
debanjum Oct 17, 2023
6baaaaf
Test request body of multi-part form to update content index from kho…
debanjum Oct 17, 2023
f2e293a
Push Vault files to index to Khoj server using Khoj Obsidian plugin
debanjum Oct 17, 2023
8e627a5
Pass any files to be deleted to indexer API via Khoj Obsidian plugin
debanjum Oct 17, 2023
d27dc71
Use encoding of each file set in indexer request to read file
debanjum Oct 17, 2023
541cd59
Let fs_syncer pass PDF files directly as binary before indexing
debanjum Oct 17, 2023
99a2c93
Add CORS policy to allow requests from khoj apps, obsidian & localhost
debanjum Oct 17, 2023
13a3122
Stop configuring server to pull files to index from Obsidian client
debanjum Oct 17, 2023
05be6bd
Clicking Update Index in Obsidian settings should push files to index
debanjum Oct 17, 2023
e347823
Log telemetry for index updates via push to API endpoint
debanjum Oct 17, 2023
84654ff
Update indexer API endpoint URL to index/update from indexer/batch
debanjum Oct 17, 2023
5efae1a
Update indexer API endpoint query params for force, content type
debanjum Oct 17, 2023
6a4f1b2
Add more client, request details in logs by index/update API endpoint
debanjum Oct 17, 2023
7b1c62b
Mark test_get_configured_types_via_api unit test as flaky
debanjum Oct 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ dependencies = [
"dateparser >= 1.1.1",
"defusedxml == 0.7.1",
"fastapi == 0.77.1",
"python-multipart >= 0.0.5",
"jinja2 == 3.1.2",
"openai >= 0.27.0, < 1.0.0",
"tiktoken >= 0.3.2",
Expand Down
137 changes: 66 additions & 71 deletions src/interface/desktop/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ const {dialog} = require('electron');

const cron = require('cron').CronJob;
const axios = require('axios');
const { Readable } = require('stream');

const KHOJ_URL = 'http://127.0.0.1:42110'

Expand Down Expand Up @@ -65,7 +64,7 @@ const schema = {

var state = {}

const store = new Store({schema});
const store = new Store({ schema });

console.log(store);

Expand All @@ -86,37 +85,48 @@ function handleSetTitle (event, title) {
});
}

function filenameToMimeType (filename) {
const extension = filename.split('.').pop();
switch (extension) {
case 'pdf':
return 'application/pdf';
case 'png':
return 'image/png';
case 'jpg':
case 'jpeg':
return 'image/jpeg';
case 'md':
case 'markdown':
return 'text/markdown';
case 'org':
return 'text/org';
default:
return 'text/plain';
}
}

function pushDataToKhoj (regenerate = false) {
let filesToPush = [];
const files = store.get('files');
const folders = store.get('folders');
state = {
completed: true
}
const files = store.get('files') || [];
const folders = store.get('folders') || [];
state = { completed: true }

if (files) {
for (file of files) {
filesToPush.push(file.path);
}
for (const file of files) {
filesToPush.push(file.path);
}
if (folders) {
for (folder of folders) {
const files = fs.readdirSync(folder.path, { withFileTypes: true });
for (file of files) {
if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
filesToPush.push(path.join(folder.path, file.name));
}

for (const folder of folders) {
const files = fs.readdirSync(folder.path, { withFileTypes: true });
for (const file of files) {
if (file.isFile() && validFileTypes.includes(file.name.split('.').pop())) {
filesToPush.push(path.join(folder.path, file.name));
}
}
}

let data = {
files: []
}

const lastSync = store.get('lastSync') || [];

for (file of filesToPush) {
const formData = new FormData();
for (const file of filesToPush) {
const stats = fs.statSync(file);
if (!regenerate) {
if (stats.mtime.toISOString() < lastSync.find((syncedFile) => syncedFile.path === file)?.datetime) {
Expand All @@ -125,18 +135,10 @@ function pushDataToKhoj (regenerate = false) {
}

try {
let rawData;
// If the file is a PDF or IMG file, read it as a binary file
if (binaryFileTypes.includes(file.split('.').pop())) {
rawData = fs.readFileSync(file).toString('base64');
} else {
rawData = fs.readFileSync(file, 'utf8');
}

data.files.push({
path: file,
content: rawData
});
encoding = binaryFileTypes.includes(file.split('.').pop()) ? "binary" : "utf8";
mimeType = filenameToMimeType(file) + (encoding === "utf8" ? "; charset=UTF-8" : "");
fileObj = new Blob([fs.createReadStream(file, encoding)], { type: mimeType });
formData.append('files', fileObj, file);
state[file] = {
success: true,
}
Expand All @@ -151,44 +153,37 @@ function pushDataToKhoj (regenerate = false) {

for (const syncedFile of lastSync) {
if (!filesToPush.includes(syncedFile.path)) {
data.files.push({
path: syncedFile.path,
content: ""
});
fileObj = new Blob([""], { type: filenameToMimeType(syncedFile.path) });
debanjum marked this conversation as resolved.
Show resolved Hide resolved
formData.append('files', fileObj, syncedFile.path);
}
}

const headers = { 'x-api-key': 'secret', 'Content-Type': 'application/json' };

const stream = new Readable({
read() {
this.push(JSON.stringify(data));
this.push(null);
}
});

const hostURL = store.get('hostURL') || KHOJ_URL;

axios.post(`${hostURL}/v1/indexer/batch?regenerate=${regenerate}`, stream, { headers })
.then(response => {
console.log(response.data);
const win = BrowserWindow.getAllWindows()[0];
win.webContents.send('update-state', state);
let lastSync = [];
for (const file of filesToPush) {
lastSync.push({
path: file,
datetime: new Date().toISOString()
});
}
store.set('lastSync', lastSync);
})
.catch(error => {
console.error(error);
state['completed'] = false
const win = BrowserWindow.getAllWindows()[0];
win.webContents.send('update-state', state);
});
if (!!formData?.entries()?.next().value) {
const hostURL = store.get('hostURL') || KHOJ_URL;
const headers = {
'x-api-key': 'secret'
};
axios.post(`${hostURL}/api/v1/index/update?force=${regenerate}&client=desktop`, formData, { headers })
.then(response => {
console.log(response.data);
const win = BrowserWindow.getAllWindows()[0];
win.webContents.send('update-state', state);
let lastSync = [];
for (const file of filesToPush) {
lastSync.push({
path: file,
datetime: new Date().toISOString()
});
}
store.set('lastSync', lastSync);
})
.catch(error => {
console.error(error);
state['completed'] = false
const win = BrowserWindow.getAllWindows()[0];
win.webContents.send('update-state', state);
});
}
}

pushDataToKhoj();
Expand Down
Loading
Loading