Skip to content

Commit

Permalink
make chunk size smaller, add seed and update openai python lib
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Sep 17, 2024
1 parent c74e051 commit 4c00c9b
Show file tree
Hide file tree
Showing 3 changed files with 199 additions and 14 deletions.
18 changes: 16 additions & 2 deletions docs/tools/fix_grammar_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# constants
BASE_DIR = "../website/docs"
GPT_MODEL = "gpt-4o-2024-05-13"
MAX_CHUNK_SIZE = 14000 # make sure that this is below the context window size of the model to not have cut off files
MAX_CHUNK_SIZE = 4000 # make sure that this is below the context window size of the model to not have cut off files

SYSTEM_PROMPT = """\
You are a grammar checker. Every message you get will be a document that is to be grammarchecked and returned as such.
Expand Down Expand Up @@ -112,17 +112,31 @@ def get_chunk_length(chunk: List[str]) -> int:
fixed_chunks: List[str] = []
for chunk in chunks:
client = OpenAI()
input = "".join(chunk)
response = client.chat.completions.create(
seed=1239812398,
model=GPT_MODEL,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": "".join(chunk)},
{"role": "user", "content": input},
],
temperature=0,
)

print(response)
fixed_chunks.append(response.choices[0].message.content) # type: ignore

print("=========")
print("INPUT")
print("=========")
print(input)
print("=========")
print("OUTPUT")
print("=========")

print(fixed_chunks[0])
exit(0)

with open(file_path, "w", encoding="utf-8") as f:
for c in fixed_chunks:
f.write(c)
Expand Down
Loading

0 comments on commit 4c00c9b

Please sign in to comment.