Skip to content

Commit

Permalink
Merge pull request #1 from lion-agi/initial-set-up
Browse files Browse the repository at this point in the history
Initial set up
  • Loading branch information
ohdearquant authored Sep 26, 2024
2 parents c2873b8 + a4b00f2 commit fa9b117
Show file tree
Hide file tree
Showing 13 changed files with 1,073 additions and 0 deletions.
22 changes: 22 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 2
updates:
# Poetry
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
allow:
- dependency-type: "all"
versioning-strategy: "lockfile-only"
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-patch"]
# As we use Poetry, we need to specify this as the package manager

# GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"

# filepath: .github/dependabot.yml
40 changes: 40 additions & 0 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: "CodeQL"

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
schedule:
- cron: '0 0 * * 0'

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'python' ]

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}

- name: Autobuild
uses: github/codeql-action/autobuild@v3

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3

# filepath: .github/workflows/codeql-analysis.yml
53 changes: 53 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Release

on:
release:
types: [published]

permissions:
contents: read

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10', '3.11', '3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.8.2
- name: Install dependencies
run: poetry install --no-interaction --no-root
- name: Run tests
run: poetry run pytest

deploy:
needs: test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.8.2
- name: Install dependencies
run: poetry install --no-interaction --no-root
- name: Build package
run: poetry build
- name: Publish package to PyPI
env:
PYPI_TOKEN: ${{ secrets.PYPI_API_KEY }}
run: poetry config pypi-token.pypi $PYPI_TOKEN && poetry publish

# filepath: .github/workflows/release.yml
5 changes: 5 additions & 0 deletions lion_service/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .version import __version__
from .service import Service, register_service
from .rate_limiter import RateLimiter

__all__ = ["Service", "register_service", "RateLimiter", "__version__"]
9 changes: 9 additions & 0 deletions lion_service/complete_request_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pydantic import BaseModel, Field


class CompleteRequestInfo(BaseModel):
timestamp: float = Field(description="HTTP response generated time")


class CompleteRequestTokenInfo(CompleteRequestInfo):
token_usage: int = Field(description="Number of tokens used in the request")
97 changes: 97 additions & 0 deletions lion_service/rate_limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from collections import deque
from datetime import datetime, UTC, timezone

from pydantic import BaseModel, Field

from .complete_request_info import CompleteRequestInfo, CompleteRequestTokenInfo


class RateLimiter(BaseModel):
limit_tokens: int = Field(default=None)

limit_requests: int = Field(default=None)

remaining_tokens: int = Field(default=None)

remaining_requests: int = Field(default=None)

last_check_timestamp: float = Field(
default=None, description="Last time to check tokens and requests."
)

unreleased_requests: deque = Field(
default_factory=deque,
description="completed request info for replenish",
exclude=True,
)

def append_complete_request_token_info(self, info: CompleteRequestInfo):
if not self.limit_tokens and not self.limit_requests:
# no limits
return

self.unreleased_requests.append(info)
if self.limit_tokens and isinstance(info, CompleteRequestTokenInfo):
# For request with token limits only
if self.remaining_tokens:
self.remaining_tokens -= info.token_usage
else:
self.remaining_tokens = self.limit_tokens - info.token_usage

if self.limit_requests:
if self.remaining_requests:
self.remaining_requests -= 1
else:
self.remaining_requests = self.limit_requests - 1

def release_tokens(self):
self.last_check_timestamp = datetime.now(UTC).timestamp()
while self.unreleased_requests:
if (
datetime.now(UTC).timestamp() - self.unreleased_requests[0].timestamp
> 60
):
release_info = self.unreleased_requests.popleft()
if (
isinstance(release_info, CompleteRequestTokenInfo)
and self.remaining_tokens
):
self.remaining_tokens += release_info.token_usage
if self.remaining_requests:
self.remaining_requests += 1
else:
break

def update_rate_limit(self, request_datetime_header, total_token_usage: int = None):
# rate limiter tokens only tracks if there are token usage info
# otherwise, tracks requests num
date_format = "%a, %d %b %Y %H:%M:%S GMT" # the format of the date string according to RFC 1123
# (in http response header)
dt = datetime.strptime(request_datetime_header, date_format)
dt = dt.replace(tzinfo=timezone.utc)
request_timestamp = dt.timestamp()

if total_token_usage:
complete_request_info = CompleteRequestTokenInfo(
timestamp=request_timestamp, token_usage=total_token_usage
)
else:
complete_request_info = CompleteRequestInfo(timestamp=request_timestamp)
self.append_complete_request_token_info(complete_request_info)

def check_availability(
self, request_token_len: int = 0, estimated_output_len: int = 0
):
if self.remaining_tokens is not None:
if request_token_len + estimated_output_len > self.remaining_tokens:
return False
if self.remaining_requests is not None:
if self.remaining_requests <= 0:
return False
return True


class RateLimitError(Exception):
def __init__(self, message, input_token_len, estimated_output_len):
super().__init__(message)
self.requested_tokens = input_token_len + estimated_output_len
37 changes: 37 additions & 0 deletions lion_service/service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
class Service:
pass


def register_service(cls):
original_init = cls.__init__

def wrapped_init(self, *args, **kwargs):
original_init(self, *args, **kwargs)
service_name = self.name
ServiceSetting().add_service(self, service_name)

cls.__init__ = wrapped_init
return cls


class ServiceSetting(object):
_instance = None

def __init__(self):
self.services = {}

def __new__(cls):
if cls._instance is None:
cls._instance = super(ServiceSetting, cls).__new__(cls)
return cls._instance

def add_service(self, service: Service, name: str = None):
if name:
if self.services.get(name):
raise ValueError(
"Invalid name. There is a service using the name, please change a name."
)
self.services[name] = service
else:
name = service.__class__.__name__ + "_" + str(len(self.services))
self.services[name] = service
63 changes: 63 additions & 0 deletions lion_service/service_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import asyncio
from functools import wraps
from .rate_limiter import RateLimitError


def invoke_retry(max_retries: int = 3, base_delay: int = 1, max_delay: int = 60):
def decorator(func):
@wraps(func)
async def wrapper(request_model, *args, **kwargs):
if max_retries <= 0:
raise ValueError(
"Invalid max number of retries. It must a positive integer."
)

for retry in range(max_retries + 1):
try:
response_body = await func(request_model, *args, **kwargs)
return response_body
except Exception as e:
# Last try used
if retry == max_retries:
raise e

# RateLimitError for Model only
if isinstance(e, RateLimitError):
if e.requested_tokens > request_model.rate_limiter.limit_tokens:
raise ValueError(
"Requested tokens exceed the model's token limit. "
"Please modify the input, adjust the expected output tokens, or increase the token limit. "
f"The current token limit is {request_model.rate_limiter.limit_tokens} tokens."
)

while request_model.rate_limiter.unreleased_requests:
await asyncio.sleep(2)
request_model.rate_limiter.release_tokens()
if request_model.rate_limiter.check_availability(
request_token_len=e.requested_tokens
):
break

elif error_code := getattr(
e, "status", None
): # http request errors
if error_code == 429 and "exceeded your current quota" in str(
e
): # RateLimitError (account quota reached)
raise e
if error_code == 429 or error_code >= 500: # ServerError
if retry_after := getattr(e, "headers", {}).get(
"Retry-After"
):
if retry_after.isdigit():
await asyncio.sleep(int(retry_after))
continue

wait_time = min(base_delay * (2**retry), max_delay)
await asyncio.sleep(wait_time)
else:
raise e

return wrapper

return decorator
49 changes: 49 additions & 0 deletions lion_service/token_calculator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import tiktoken
from abc import abstractmethod

from pydantic import BaseModel, Field, field_validator


class TokenCalculator(BaseModel):

@abstractmethod
def calculate(self, *args, **kwargs): ...


class TiktokenCalculator(TokenCalculator):
encoding_name: str = Field(
description="Encoding for converting text to tokens. "
"Input encoding name or a specific OpenAI model",
examples=["o200k_base", "gpt-4o"],
)

@field_validator("encoding_name")
@classmethod
def get_encoding_name(cls, value: str) -> str:
try:
enc = tiktoken.encoding_for_model(value)
return enc.name
except:
try:
tiktoken.get_encoding(value)
return value
except:
return "o200k_base"

def encode(self, text: str) -> list[int]:
enc = tiktoken.get_encoding(self.encoding_name)
return enc.encode(text=text)

def calculate(self, text: str) -> int:
enc_text = self.encode(text)
return len(enc_text)

def tokenize(
self, text: str, decode_byte_str: bool = False, decoder: str = "utf-8"
):
enc = tiktoken.get_encoding(self.encoding_name)
enc_text = self.encode(text)
tokens = [enc.decode_single_token_bytes(token_id) for token_id in enc_text]
if decode_byte_str:
tokens = [token.decode(decoder) for token in tokens]
return tokens
1 change: 1 addition & 0 deletions lion_service/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.1.0"
Loading

0 comments on commit fa9b117

Please sign in to comment.