-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2564c33
commit aefb05d
Showing
19 changed files
with
2,264 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
venv/ | ||
__pycache__/ | ||
blserver.egg-info/ | ||
build/ | ||
example.toml | ||
example.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
__pycache__/ | ||
*.pyc | ||
config.toml | ||
venv/ | ||
blserver.egg-info/ | ||
build/ | ||
*.json | ||
milvus.yaml | ||
test.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime | ||
|
||
WORKDIR /app | ||
|
||
COPY . /app | ||
|
||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
RUN cp example.toml config.toml | ||
|
||
ENV CONFIG_FILE=/app/config.toml | ||
|
||
ENV PORT=5000 | ||
|
||
CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:$PORT", "blserver:create_app('$CONFIG_FILE')"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Binlex Server | ||
|
||
|
||
## Installing | ||
```bash | ||
cp example.toml config.toml # Config for blserver | ||
cp example.yaml milvus.yaml # Config for Milvus | ||
docker-compose up -d | ||
``` | ||
|
||
### Setting up MinIO | ||
|
||
Milvus comes with MinIO, and `blserver` leverages it for object storage. | ||
|
||
Navigate to `http://127.0.0.1:9001`, and login with the MinIO credentials you set in the `docker-compose.yml` file. | ||
|
||
Once logged in you will need to create an access key, which will provide you an access key and secret key. | ||
|
||
This access key and secret key needs to be set for MinIO in your `config.toml` configuration file for `blserver`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
#!/usr/bin/env python | ||
|
||
import json | ||
import argparse | ||
import tomllib | ||
from flask import Flask, request, Response | ||
from flask_restx import Resource, Api, fields | ||
from libblserver import BinlexGNN, BinlexVectorEmbedding, BinlexMinio, BinlexMilvus | ||
|
||
__author__ = 'c3rb3ru5d3d53c' | ||
__version__ = '2.0.0' | ||
|
||
def read_config(file_path: str) -> dict: | ||
try: | ||
with open(file_path, "rb") as file: | ||
return tomllib.load(file) | ||
except FileNotFoundError: | ||
raise FileNotFoundError(f"The file at {file_path} does not exist.") | ||
except tomllib.TOMLDecodeError as e: | ||
raise ValueError(f"Error decoding TOML: {e}") | ||
|
||
def is_valid_data(data: dict) -> bool: | ||
if 'type' not in data: return False | ||
if data['type'] != 'function': return False | ||
return True | ||
|
||
def create_app(config: str) -> Flask: | ||
server_config = read_config(config) | ||
|
||
app = Flask(__name__) | ||
|
||
print('connecting to minio...') | ||
minio_client = BinlexMinio(server_config) | ||
print('connecting to milvus...') | ||
milvus_client = BinlexMilvus(server_config) | ||
|
||
def require_api_key(func): | ||
"""Decorator to require an API key for endpoint access.""" | ||
def wrapper(*args, **kwargs): | ||
api_key = request.headers.get('API-Key') | ||
if api_key not in server_config['blserver']['authentication']['api_keys']: | ||
return Response( | ||
response=json.dumps({'error': 'Unauthorized'}), | ||
status=401, | ||
mimetype='application/json' | ||
) | ||
return func(*args, **kwargs) | ||
return wrapper | ||
|
||
api = Api( | ||
app, | ||
title='Binlex Server', | ||
version=f'v{__version__}', | ||
description='A Binlex Server', | ||
doc='/swagger' | ||
) | ||
|
||
embedding_input_model = api.model( | ||
'EmbeddingInput', | ||
{ | ||
'type': fields.String( | ||
required=True, | ||
description='The type of data to process, must be "function"', | ||
example='function' | ||
) | ||
} | ||
) | ||
|
||
embedding_search_model = api.model( | ||
'EmbeddingSearchInput', | ||
{ | ||
'vector': fields.List( | ||
fields.Float, | ||
required=True, | ||
description='A list of float values representing the embedding vector to search', | ||
example=[0.1, 0.2, 0.3, 0.4] | ||
) | ||
} | ||
) | ||
|
||
embedding_response_model = api.model( | ||
'EmbeddingResponse', | ||
{ | ||
'vector': fields.List( | ||
fields.Float, | ||
description='The resulting embedding vector', | ||
example=[0.1, 0.2, 0.3] | ||
), | ||
'data': fields.Raw( | ||
description='Original data associated with the embedding', | ||
example={"key": "value"} | ||
) | ||
} | ||
) | ||
|
||
error_response_model = api.model( | ||
'ErrorResponse', | ||
{ | ||
'error': fields.String( | ||
description='Error message explaining the issue', | ||
example='Invalid input: Missing "type" field' | ||
) | ||
} | ||
) | ||
|
||
@api.route('/embeddings/index/<string:database>') | ||
class BinlexServerEmbeddingsInsert(Resource): | ||
@require_api_key | ||
@api.expect(embedding_input_model, validate=True) | ||
@api.response(200, 'Success', embedding_response_model) | ||
@api.response(400, 'Invalid Input', error_response_model) | ||
@api.response(415, 'Unsupported Media Type', error_response_model) | ||
@api.response(500, 'Internal Server Error', error_response_model) | ||
@api.doc(description='Insert Embeddings') | ||
def post(self, database): | ||
try: | ||
data = json.loads(request.data) | ||
|
||
if not is_valid_data(data): | ||
return json.dumps({'error': 'Invalid JSON data'}), 400 | ||
|
||
gnn = BinlexGNN( | ||
json.dumps(data), | ||
block_pca_dim=server_config['milvus']['dimensions']['input'], | ||
gnn_hidden_dim=server_config['milvus']['dimensions']['hidden'], | ||
gnn_output_dim=server_config['milvus']['dimensions']['output'], | ||
) | ||
|
||
embedding = gnn.to_embedding() | ||
|
||
milvus_client.index_vector( | ||
minio_client=minio_client, | ||
database=database, | ||
collection_name='functions', | ||
vector=embedding.vector, | ||
data=embedding.data | ||
) | ||
|
||
return json.dumps(embedding.vector), 200 | ||
except Exception as e: | ||
return json.dumps({'error': str(e)}), 500 | ||
|
||
@api.route('/embeddings/search/<string:database>') | ||
class BinlexServerEmbeddingsSearch(Resource): | ||
@require_api_key | ||
@api.expect(embedding_search_model, validate=True) | ||
@api.response(200, 'Success', fields.List( | ||
fields.Raw( | ||
description='List of search results with similarity scores' | ||
) | ||
)) | ||
@api.response(400, 'Invalid Input', error_response_model) | ||
@api.response(500, 'Internal Server Error', error_response_model) | ||
@api.doc(description='Search Embeddings') | ||
def post(self, database): | ||
try: | ||
request_data = json.loads(request.data) | ||
|
||
if not isinstance(request_data, list) or not all(isinstance(x, (int, float)) for x in request_data): | ||
return json.dumps({'error': 'expected a list of float values'}), 400 | ||
|
||
top_k = server_config['blserver']['similarity']['top_k'] | ||
similarity_threshold = server_config['blserver']['similarity']['threshold'] | ||
|
||
results = milvus_client.search_vector( | ||
minio_client=minio_client, | ||
database=database, | ||
collection_name='functions', | ||
float_vector=request_data, | ||
top_k=top_k, | ||
similarity_threshold=similarity_threshold | ||
) | ||
|
||
return json.dumps(results), 200 | ||
except json.JSONDecodeError: | ||
return json.dumps({'error': 'Invalid JSON input'}), 400 | ||
except Exception as e: | ||
return json.dumps({'error': str(e)}), 500 | ||
|
||
@api.route('/embeddings/inference') | ||
class BinlexServerEmbeddingsInference(Resource): | ||
@require_api_key | ||
@api.expect(embedding_input_model, validate=True) | ||
@api.response(200, 'Success', embedding_response_model) | ||
@api.response(400, 'Invalid Input', error_response_model) | ||
@api.response(415, 'Unsupported Media Type', error_response_model) | ||
@api.response(500, 'Internal Server Error', error_response_model) | ||
@api.doc(description='Embedding Inference') | ||
def post(self): | ||
try: | ||
request_data = json.loads(request.data) | ||
|
||
if not is_valid_data(request_data): | ||
return json.dumps({'error': 'invalid or unsupported input data'}), 400 | ||
|
||
gnn = BinlexGNN( | ||
json.dumps(request_data), | ||
block_pca_dim=server_config['milvus']['dimensions']['input'], | ||
gnn_hidden_dim=server_config['milvus']['dimensions']['hidden'], | ||
gnn_output_dim=server_config['milvus']['dimensions']['output'], | ||
) | ||
|
||
embedding = gnn.to_embedding() | ||
|
||
return json.dumps(embedding.vector), 200 | ||
except Exception as e: | ||
return json.dumps({'error': str(e)}), 500 | ||
|
||
@api.route('/embeddings/database/list') | ||
class BinlexServerEmbeddingsInference(Resource): | ||
@require_api_key | ||
@api.doc(description='List Databases') | ||
def get(self): | ||
try: | ||
databases = milvus_client.list_databases() | ||
return json.dumps(databases), 200 | ||
except Exception as e: | ||
return json.dumps({'error': str(e)}), 500 | ||
|
||
print('server started') | ||
|
||
return app | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Binlex Server') | ||
parser.add_argument('--config', required=True, help='Configuration File Path') | ||
|
||
args = parser.parse_args() | ||
|
||
app = create_app(args.config) | ||
|
||
config = read_config(args.config) | ||
|
||
app.run( | ||
host=config['blserver']['host'], | ||
port=config['blserver']['port'], | ||
debug=config['blserver']['debug'] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
version: '3.5' | ||
|
||
services: | ||
etcd: | ||
container_name: milvus-etcd | ||
image: quay.io/coreos/etcd:v3.5.14 | ||
environment: | ||
- ETCD_AUTO_COMPACTION_MODE=revision | ||
- ETCD_AUTO_COMPACTION_RETENTION=1000 | ||
- ETCD_QUOTA_BACKEND_BYTES=4294967296 | ||
- ETCD_SNAPSHOT_COUNT=50000 | ||
volumes: | ||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd | ||
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd | ||
healthcheck: | ||
test: ["CMD", "etcdctl", "endpoint", "health"] | ||
interval: 30s | ||
timeout: 20s | ||
retries: 3 | ||
|
||
minio: | ||
container_name: milvus-minio | ||
image: minio/minio:RELEASE.2023-03-20T20-16-18Z | ||
environment: | ||
MINIO_ACCESS_KEY: minioadmin | ||
MINIO_SECRET_KEY: minioadmin | ||
ports: | ||
- "9001:9001" | ||
- "9000:9000" | ||
volumes: | ||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data | ||
command: minio server /minio_data --console-address ":9001" | ||
healthcheck: | ||
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] | ||
interval: 30s | ||
timeout: 20s | ||
retries: 3 | ||
|
||
milvus: | ||
container_name: milvus-standalone | ||
image: milvusdb/milvus:v2.5.1 | ||
command: ["milvus", "run", "standalone"] | ||
security_opt: | ||
- seccomp:unconfined | ||
environment: | ||
ETCD_ENDPOINTS: etcd:2379 | ||
MINIO_ADDRESS: minio:9000 | ||
volumes: | ||
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus | ||
- ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml | ||
healthcheck: | ||
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] | ||
interval: 30s | ||
start_period: 90s | ||
timeout: 20s | ||
retries: 3 | ||
ports: | ||
- "19530:19530" | ||
- "9091:9091" | ||
depends_on: | ||
- "etcd" | ||
- "minio" | ||
|
||
blserver: | ||
container_name: blserver | ||
build: | ||
context: . | ||
dockerfile: Dockerfile | ||
environment: | ||
CONFIG_FILE: /app/config.toml | ||
PORT: 5000 | ||
volumes: | ||
- ${DOCKER_VOLUME_DIRECTORY:-.}/config.toml:/app/config.toml | ||
command: ["gunicorn", "-w", "4", "-b", "0.0.0.0:5000", "blserver:create_app(config='/app/config.toml')"] | ||
ports: | ||
- "5000:5000" | ||
depends_on: | ||
- "milvus" | ||
- "minio" | ||
|
||
networks: | ||
default: | ||
name: milvus |
Oops, something went wrong.