Skip to content

Commit

Permalink
change to submit job jsonl instead of txt
Browse files Browse the repository at this point in the history
  • Loading branch information
nerdai committed Sep 23, 2024
1 parent 02941fe commit c8902bc
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 35 deletions.
53 changes: 51 additions & 2 deletions arc_finetuning_st/cli/command_line.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import asyncio
import json
from os import listdir
from pathlib import Path
from typing import Any, List, Optional, cast
Expand All @@ -8,6 +9,8 @@

from arc_finetuning_st.cli.evaluation import batch_runner
from arc_finetuning_st.cli.finetune import (
FINETUNE_JOBS_FILENAME,
check_job_status,
prepare_finetuning_jsonl_file,
submit_finetune_job,
)
Expand Down Expand Up @@ -56,19 +59,62 @@ def handle_evaluate(


def handle_finetune_job_submit(
llm: str, start_job_id: Optional[str], **kwargs: Any
llm: str,
start_job_id: Optional[str],
continue_latest: bool = False,
**kwargs: Any,
) -> None:
prepare_finetuning_jsonl_file(
json_path=SINGLE_EXAMPLE_JSON_PATH, assets_path=FINETUNING_ASSETS_PATH
)
if continue_latest:
try:
with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME) as f:
lines = f.read().splitlines()
metadata_str = lines[-1]
metadata = json.loads(metadata_str)
start_job_id = metadata["start_job_id"]
llm = metadata["model"]
except FileNotFoundError:
# no previous finetune model
raise ValueError(
"Missing `finetuning_jobs.jsonl` file. Have you submitted a prior job?"
)

submit_finetune_job(
llm=llm,
start_job_id=start_job_id,
json_path=SINGLE_EXAMPLE_JSON_PATH,
assets_path=FINETUNING_ASSETS_PATH,
)


def handle_check_finetune_job(
start_job_id: Optional[str], llm: Optional[str], use_latest: bool
) -> None:
if use_latest:
try:
with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME) as f:
job_metadata = json.load(f)
start_job_id = job_metadata["start_job_id"]
llm = job_metadata["model"]
except FileNotFoundError:
raise ValueError(
"No finetuning_jobs.json file exists. You likely haven't submitted a job yet."
)
if not use_latest and (start_job_id is None or llm is None):
raise ValueError(
"If not `use_latest` then must provide `start_job_id` and `llm`."
)

# make type checking happy
if start_job_id and llm:
check_job_status(
start_job_id=start_job_id,
llm=llm,
assets_path=FINETUNING_ASSETS_PATH,
)


def main() -> None:
parser = argparse.ArgumentParser(description="arc-finetuning cli tool.")

Expand Down Expand Up @@ -117,6 +163,9 @@ def main() -> None:
default=None,
help="Previously started job id, to continue finetuning.",
)
finetune_parser.add_argument(
"--continue-latest", action=argparse.BooleanOptionalAction
)
finetune_parser.set_defaults(
func=lambda args: handle_finetune_job_submit(**vars(args))
)
Expand Down
48 changes: 15 additions & 33 deletions arc_finetuning_st/cli/finetune.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from os import listdir
from pathlib import Path
from typing import Optional
Expand All @@ -13,7 +14,7 @@
)

FINETUNE_JSONL_FILENAME = "finetuning.jsonl"
FINETUNE_JOBS_FILENAME = "finetuning_jobs.txt"
FINETUNE_JOBS_FILENAME = "finetuning_jobs.jsonl"


def prepare_finetuning_jsonl_file(
Expand All @@ -32,21 +33,9 @@ def prepare_finetuning_jsonl_file(
def submit_finetune_job(
llm: str = "gpt-4o-2024-08-06",
start_job_id: Optional[str] = None,
json_path: Path = SINGLE_EXAMPLE_JSON_PATH,
assets_path: Path = FINETUNING_ASSETS_PATH,
) -> None:
"""Submit finetuning job."""

try:
with open(assets_path / FINETUNE_JOBS_FILENAME) as f:
lines = f.read().splitlines()
current_job_id = lines[-1]
except FileNotFoundError:
# no previous finetune model
current_job_id = None

start_job_id = current_job_id if current_job_id else start_job_id

finetune_engine = OpenAIFinetuneEngine(
llm,
(assets_path / FINETUNE_JSONL_FILENAME).as_posix(),
Expand All @@ -56,35 +45,28 @@ def submit_finetune_job(
finetune_engine.finetune()

with open(assets_path / FINETUNE_JOBS_FILENAME, "a+") as f:
f.write(finetune_engine._start_job.id)
metadata = {
"model": llm,
"start_job_id": finetune_engine._start_job.id,
}
json.dump(metadata, f)
f.write("\n")

print(finetune_engine.get_current_job())


def check_latest_job_status() -> None:
def check_job_status(
start_job_id: str,
llm: str = "gpt-4o-2024-08-06",
assets_path: Path = FINETUNING_ASSETS_PATH,
) -> None:
"""Check on status of most recent submitted finetuning job."""
try:
with open(FINETUNING_ASSETS_PATH / FINETUNE_JOBS_FILENAME) as f:
lines = f.read().splitlines()
current_job_id = lines[-1]
except FileNotFoundError:
raise ValueError(
"No finetuning_jobs.txt file exists. You likely haven't submitted a job yet."
)

finetune_engine = OpenAIFinetuneEngine(
"gpt-4o-2024-08-06",
(FINETUNING_ASSETS_PATH / FINETUNE_JSONL_FILENAME).as_posix(),
start_job_id=current_job_id,
llm,
(assets_path / FINETUNE_JSONL_FILENAME).as_posix(),
start_job_id=start_job_id,
validate_json=False,
)

print(finetune_engine.get_current_job())


if __name__ == "__main__":
FINETUNING_ASSETS_PATH.mkdir(exist_ok=True, parents=True)
prepare_finetuning_jsonl_file()
# submit_finetune_job()
check_latest_job_status()

0 comments on commit c8902bc

Please sign in to comment.