Skip to content

Commit

Permalink
evaluate working
Browse files Browse the repository at this point in the history
  • Loading branch information
nerdai committed Sep 23, 2024
1 parent 7a80e4c commit a95c4b7
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 1 deletion.
Empty file.
85 changes: 85 additions & 0 deletions arc_finetuning_st/cli/command_line.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import argparse
import asyncio
from os import listdir
from pathlib import Path
from typing import Any, List, cast

from llama_index.llms.openai import OpenAI

from arc_finetuning_st.cli.evaluation import batch_runner
from arc_finetuning_st.workflows.arc_task_solver import (
ARCTaskSolverWorkflow,
WorkflowOutput,
)


def handle_evaluate(
llm: str,
batch_size: int,
num_workers: int,
verbose: bool,
sleep: int,
**kwargs: Any,
) -> None:
data_path = Path(
Path(__file__).parents[2].absolute(), "data", "evaluation"
)
task_paths = [data_path / t for t in listdir(data_path)]
llm = OpenAI(llm)
w = ARCTaskSolverWorkflow(llm=llm, timeout=None)
results = asyncio.run(
batch_runner(
w,
task_paths[:10],
verbose=verbose,
batch_size=batch_size,
num_workers=num_workers,
sleep=sleep,
)
)
results = cast(List[WorkflowOutput], results)
num_solved = sum(el.passing for el in results)
print(
f"Solved: {num_solved}\nTotal Tasks:{len(results)}\nAverage Solve Rate: {float(num_solved) / len(results)}"
)


def main() -> None:
parser = argparse.ArgumentParser(description="arc-finetuning cli tool.")

# Subparsers
subparsers = parser.add_subparsers(
title="commands", dest="command", required=True
)

# evaluation command
evaluate_parser = subparsers.add_parser(
"evaluate",
help="Evaluation of ARC Task predictions with LLM and ARCTaskSolverWorkflow.",
)
evaluate_parser.add_argument(
"-m",
"--llm",
type=str,
default="gpt-4o",
help="The OpenAI LLM model to use with the Workflow.",
)
evaluate_parser.add_argument("-b", "--batch-size", type=int, default=5)
evaluate_parser.add_argument("-w", "--num-workers", type=int, default=3)
evaluate_parser.add_argument(
"-v", "--verbose", action=argparse.BooleanOptionalAction
)
evaluate_parser.add_argument("-s", "--sleep", type=int, default=10)
evaluate_parser.set_defaults(
func=lambda args: handle_evaluate(**vars(args))
)

# Parse the command-line arguments
args = parser.parse_args()

# Call the appropriate function based on the command
args.func(args)


if __name__ == "__main__":
main()
58 changes: 58 additions & 0 deletions arc_finetuning_st/cli/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import asyncio
from os import listdir
from pathlib import Path
from typing import Any, List, cast

from llama_index.core.async_utils import chunks
from llama_index.llms.openai import OpenAI

from arc_finetuning_st.workflows.arc_task_solver import (
ARCTaskSolverWorkflow,
WorkflowOutput,
)

DATA_PATH = Path(Path(__file__).parents[1].absolute(), "data", "evaluation")


async def batch_runner(
workflow: ARCTaskSolverWorkflow,
task_paths: List[Path],
batch_size: int = 5,
verbose: bool = False,
sleep: int = 10,
num_workers: int = 3,
) -> List[Any]:
output: List[Any] = []
sem = asyncio.Semaphore(num_workers)
for task_chunk in chunks(task_paths, batch_size):
task_chunk = (
workflow.load_and_run_task(task_path=task_path, sem=sem)
for task_path in task_chunk
if task_path is not None
)
output_chunk = await asyncio.gather(*task_chunk)
output.extend(output_chunk)
if verbose:
print(
f"Completed {len(output)} out of {len(task_paths)} tasks",
flush=True,
)
await asyncio.sleep(sleep)
return output


async def main() -> None:
task_paths = [DATA_PATH / t for t in listdir(DATA_PATH)]
w = ARCTaskSolverWorkflow(
timeout=None, verbose=False, llm=OpenAI("gpt-4o")
)
results = await batch_runner(w, task_paths[:10], verbose=True)
results = cast(List[WorkflowOutput], results)
num_solved = sum(el.passing for el in results)
print(
f"Solved: {num_solved}\nTotal Tasks:{len(results)}\nAverage Solve Rate: {float(num_solved) / len(results)}"
)


if __name__ == "__main__":
asyncio.run(main())
91 changes: 91 additions & 0 deletions arc_finetuning_st/cli/finetune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# prepare jsonl from finetuning_examples/

# get finetuning model name if exists

# submit finetune job

# check on current job id

from os import listdir
from pathlib import Path

from llama_index.finetuning import OpenAIFinetuneEngine

SINGLE_EXAMPLE_JSON_PATH = Path(
Path(__file__).parents[1].absolute(), "finetuning_examples"
)

FINETUNING_ASSETS_PATH = Path(
Path(__file__).parents[1].absolute(), "finetuning_assets"
)

FINETUNE_JSONL_FILENAME = "finetuning.jsonl"
FINETUNE_JOBS_FILENAME = "finetuning_jobs.txt"


def prepare_finetuning_jsonl_file(
json_path: Path = SINGLE_EXAMPLE_JSON_PATH,
) -> None:
"""Read all json files from data path and write a jsonl file."""
with open(
FINETUNING_ASSETS_PATH / FINETUNE_JSONL_FILENAME, "w"
) as jsonl_out:
for json_name in listdir(json_path):
with open(SINGLE_EXAMPLE_JSON_PATH / json_name) as f:
for line in f:
jsonl_out.write(line)
jsonl_out.write("\n")


def submit_finetune_job() -> None:
"""Submit finetuning job."""

try:
with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME) as f:
lines = f.read().splitlines()
current_job_id = lines[-1]
except FileNotFoundError:
# no previous finetune model
current_job_id = None

finetune_engine = OpenAIFinetuneEngine(
"gpt-4o-2024-08-06",
(FINETUNING_ASSETS_PATH / FINETUNE_JSONL_FILENAME).as_posix(),
start_job_id=current_job_id,
validate_json=False,
)
finetune_engine.finetune()

with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME, "a+") as f:
f.write(finetune_engine._start_job.id)
f.write("\n")

print(finetune_engine.get_current_job())


def check_latest_job_status() -> None:
"""Check on status of most recent submitted finetuning job."""
try:
with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME) as f:
lines = f.read().splitlines()
current_job_id = lines[-1]
except FileNotFoundError:
raise ValueError(
"No finetuning_jobs.txt file exists. You likely haven't submitted a job yet."
)

finetune_engine = OpenAIFinetuneEngine(
"gpt-4o-2024-08-06",
(FINETUNING_ASSETS_PATH / FINETUNE_JSONL_FILENAME).as_posix(),
start_job_id=current_job_id,
validate_json=False,
)

print(finetune_engine.get_current_job())


if __name__ == "__main__":
FINETUNING_ASSETS_PATH.mkdir(exist_ok=True, parents=True)
prepare_finetuning_jsonl_file()
# submit_finetune_job()
check_latest_job_status()
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,15 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
llama-index-core = "^0.11.11"
llama-index-finetuning = "^0.2.1"
llama-index-llms-openai = "^0.2.7"
llama-index-program-openai = "^0.2.0"
streamlit = "^1.38.0"
plotly = "^5.24.1"
pandas = "^2.2.2"
llama-index-finetuning = "^0.2.1"

[tool.poetry.group.dev.dependencies]
jupyterlab = "^4.2.5"

[tool.poetry.scripts]
arc-finetuning-cli = 'arc_finetuning_st.cli.command_line:main'

0 comments on commit a95c4b7

Please sign in to comment.