Skip to content

Commit

Permalink
remove old external indexing functionality from lantern_cli
Browse files Browse the repository at this point in the history
  • Loading branch information
var77 committed Nov 4, 2024
1 parent cad8cb3 commit 8c40d36
Show file tree
Hide file tree
Showing 23 changed files with 76 additions and 1,480 deletions.
2 changes: 1 addition & 1 deletion ci/scripts/build-linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ function setup_postgres() {
# Fix pg_config (sometimes it points to wrong version)
rm -f /usr/bin/pg_config && ln -s /usr/lib/postgresql/$PG_VERSION/bin/pg_config /usr/bin/pg_config
# preload pg_cron, necessary for async tasks test
echo "shared_preload_libraries = 'pg_cron' " >> /etc/postgresql/$PG_VERSION/main/postgresql.conf
echo "shared_preload_libraries = 'pg_cron,lantern_extras' " >> /etc/postgresql/$PG_VERSION/main/postgresql.conf
# Enable auth without password
echo "local all all trust" > /etc/postgresql/$PG_VERSION/main/pg_hba.conf
echo "host all all 127.0.0.1/32 trust" >> /etc/postgresql/$PG_VERSION/main/pg_hba.conf
Expand Down
6 changes: 2 additions & 4 deletions lantern_cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "lantern_cli"
version = "0.4.1"
version = "0.4.2"
edition = "2021"

[[bin]]
Expand Down Expand Up @@ -37,7 +37,6 @@ image = { version = "0.25.4", features = ["jpeg", "png", "webp" ], optional = tr
nvml-wrapper = { version = "0.10.0", optional = true }
strum = { version = "0.26", features = ["derive"], optional = true }
regex = { version = "1.11.1", optional = true }
postgres-types = { version = "0.2.8", features = ["derive"], optional = true }
usearch = { git = "https://github.com/Ngalstyan4/usearch.git", rev = "aa4f91d21230fd611b6c7741fa06be8c20acc9a9", optional = true }
actix-web = { version = "4.9.0", optional = true }
env_logger = { version = "0.11.5", optional = true }
Expand All @@ -55,13 +54,12 @@ glob = { version="0.3.1", optional=true }
reqwest = { version = "0.12.9", default-features = false, features = ["json", "blocking", "rustls-tls"], optional = true }

[features]
default = ["cli", "daemon", "http-server", "autotune", "pq", "external-index", "external-index-server", "external-index-status-server", "embeddings"]
default = ["cli", "daemon", "http-server", "autotune", "pq", "external-index-server", "external-index-status-server", "embeddings"]
daemon = ["dep:tokio-postgres"]
http-server = ["dep:deadpool-postgres", "dep:deadpool", "dep:bytes", "dep:utoipa", "dep:utoipa-swagger-ui", "dep:actix-web", "dep:tokio-postgres", "dep:env_logger", "dep:actix-web-httpauth", "dep:regex"]
autotune = []
pq = ["dep:gcp_auth", "dep:linfa", "dep:linfa-clustering", "dep:md5", "dep:rayon", "dep:reqwest", "dep:postgres", "dep:ndarray"]
cli = []
external-index = ["dep:postgres-types", "dep:usearch", "dep:postgres"]
external-index-server = ["dep:bitvec", "dep:rustls", "dep:rustls-pemfile", "dep:glob", "dep:usearch"]
external-index-status-server = ["dep:actix-web"]
embeddings = ["dep:bytes", "dep:sysinfo", "dep:tiktoken-rs", "dep:url", "dep:num_cpus", "dep:ort", "dep:tokenizers", "dep:image", "dep:nvml-wrapper", "dep:strum", "dep:regex", "dep:reqwest", "dep:ndarray"]
Expand Down
63 changes: 27 additions & 36 deletions lantern_cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,31 @@ Run `cargo install --path lantern_cli` to install the binary

### Usage

Run `lantern-cli create-index --help` to show the cli options.
## Lantern External Index
Run `cargo run start-indexing-server --help` to show the cli options.

```bash
Usage: lantern-cli create-index --uri <URI> --table <TABLE> --column <COLUMN> -m <M> --efc <EFC> --ef <EF> -d <DIMS> --metric-kind <METRIC_KIND> --out <OUT> --import
```

### Example

```bash
lantern-cli create-index -u "postgresql://localhost/test" -t "small_world" -c "vec" -m 16 --ef 64 --efc 128 -d 3 --metric-kind cos --out /tmp/index.usearch --import
Usage: lantern-cli start-indexing-server [OPTIONS]

Options:
--host <HOST> Host to bind [default: 0.0.0.0]
--tmp-dir <TMP_DIR> Temp directory to save intermediate files [default: /tmp]
--port <PORT> Port to bind [default: 8998]
--status-port <STATUS_PORT> Status Server Port to bind [default: 8999]
--cert <CERT> SSL Certificate path
--key <KEY> SSL Certificate key path
-h, --help Print help
```

### Notes

The index should be created from the same database on which it will be loaded, so row tids will match later.
This will start external indexing server, which will be used when creating an index using `external=true`.
```sql
SET lantern.external_index_host='127.0.0.1';
SET lantern.external_index_port='8998';
SET lantern.external_index_secure=false;
CREATE INDEX ON test_table USING lantern_hnsw(v) WITH (external=true);
```

## Lantern Embeddings

Expand Down Expand Up @@ -122,7 +132,7 @@ To get full list of arguments use `bash lantern-cli autotune-index -h`
Lantern CLI can be used in daemon mode to continousely listen to postgres table and generate embeddings, external indexes or autotune jobs.

```bash
lantern-cli start-daemon --uri 'postgres://postgres@localhost:5432/postgres' --embedding-table embedding_jobs --autotune-table index_autotune_jobs --autotune-results-table index_parameter_experiment_results --external-index-table external_index_jobs --schema public --log-level debug
lantern-cli start-daemon --uri 'postgres://postgres@localhost:5432/postgres' --embedding-table embedding_jobs --autotune-table index_autotune_jobs --autotune-results-table index_parameter_experiment_results --schema public --log-level debug
```

This will set up trigger on specified table (`lantern_jobs`) and when new row will be inserted it will start embedding generation based on row data.
Expand All @@ -133,12 +143,15 @@ The jobs table should have the following structure
-- Embedding Jobs Table should have the following structure:
CREATE TABLE "public"."embedding_jobs" (
"id" SERIAL PRIMARY KEY,
"database_id" text NOT NULL,
"db_connection" text NOT NULL,
"schema" text NOT NULL,
"schema" text NOT NULL DEFAULT 'public',
"table" text NOT NULL,
"runtime" text NOT NULL,
"pk" text NOT NULL DEFAULT 'id',
"label" text NULL,
"job_type" text DEFAULT 'embedding_generation',
"column_type" text DEFAULT 'REAL[]',
"runtime" text NOT NULL DEFAULT 'ort',
"runtime_params" jsonb,
"batch_size" int NULL,
"src_column" text NOT NULL,
"dst_column" text NOT NULL,
"embedding_model" text NOT NULL,
Expand All @@ -151,28 +164,6 @@ CREATE TABLE "public"."embedding_jobs" (
"init_failure_reason" text,
"init_progress" int2 DEFAULT 0
);
-- External Index Jobs Table should have the following structure:
CREATE TABLE "public"."external_index_jobs" (
"id" SERIAL PRIMARY KEY,
"database_id" text NOT NULL,
"db_connection" text NOT NULL,
"schema" text NOT NULL,
"table" text NOT NULL,
"column" text NOT NULL,
"index" text,
"operator" text NOT NULL,
"efc" INT NOT NULL,
"ef" INT NOT NULL,
"m" INT NOT NULL,
"created_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updated_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
"canceled_at" timestamp,
"started_at" timestamp,
"finished_at" timestamp,
"failed_at" timestamp,
"failure_reason" text,
"progress" INT2 DEFAULT 0
);
-- Autotune Jobs Table should have the following structure:
CREATE TABLE "public"."index_autotune_jobs" (
"id" SERIAL PRIMARY KEY,
Expand Down
3 changes: 0 additions & 3 deletions lantern_cli/src/cli.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
use clap::{Parser, Subcommand};
use lantern_cli::daemon::cli::DaemonArgs;
use lantern_cli::embeddings::cli::{EmbeddingArgs, MeasureModelSpeedArgs, ShowModelsArgs};
use lantern_cli::external_index::cli::CreateIndexArgs;
use lantern_cli::external_index::cli::IndexServerArgs;
use lantern_cli::http_server::cli::HttpServerArgs;
use lantern_cli::index_autotune::cli::IndexAutotuneArgs;
use lantern_cli::pq::cli::PQArgs;

#[derive(Subcommand, Debug)]
pub enum Commands {
/// Create external index
CreateIndex(CreateIndexArgs),
/// Create embeddings
CreateEmbeddings(EmbeddingArgs),
/// Show embedding models
Expand Down
Loading

0 comments on commit 8c40d36

Please sign in to comment.