Skip to content
This repository has been archived by the owner on Dec 9, 2024. It is now read-only.

Commit

Permalink
Small fixes in preprocess.py and /static/js/preprocess.js
Browse files Browse the repository at this point in the history
  • Loading branch information
lowkw committed Jun 18, 2024
1 parent b0c4c65 commit 9fe2019
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
28 changes: 20 additions & 8 deletions app/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,26 @@ def clean_code_text(text):
return '\n'.join(cleaned_lines)


def extract_code(text):
def extract_code(text, language):
"""
Since the AI responds with backticks, extract the text from between the backticks
:param text: The AI response
:param language: The programming language the code is in
:return: The extracted text or None
"""
match = re.search(r"```(.*?)```", text, re.DOTALL)
if match:
return match.group(1).strip()
else:
return None
to_return = None
language = language.lower()

if text != "ERROR":
if "```" in text:
match = re.search(r"```(.*?)```", text, re.DOTALL)
if match:
to_return = match.group(1).strip()
else:
to_return = text

# Detect if formatted code contains the language it was written in and remove it
return (to_return and language in to_return and to_return.replace(language, '')) or to_return


def get_full_code(code_json):
Expand Down Expand Up @@ -203,13 +212,16 @@ def scan_video_for_code_frames(filename, llama_endpoint, interval_seconds=5, pro
socketio.emit("processingProgressUpdate", f"{math.floor((frame_count / total_frames) * 100)}%")
text = extract_text_from_frame(frame)
is_code = to_query(llama_endpoint, is_code_prompt, text)
# print(is_code)
if is_code and is_code.strip().lower() == 'true':
formatted_code, code_explanation = None, None
timestamp = frame_count / fps
print(f"Code detected at timestamp: {timestamp}")

if provide_formatted_code:
formatted_code = extract_code(to_query(llama_endpoint, format_code_prompt, text))
not_formatted = to_query(llama_endpoint, format_code_prompt, text)
formatted_code = extract_code(not_formatted, programming_language)
# print(not_formatted, formatted_code)

if not formatted_code:
print(f"Could not extract code from frame {frame_count}")
Expand All @@ -229,7 +241,7 @@ def scan_video_for_code_frames(filename, llama_endpoint, interval_seconds=5, pro

if len(code_frames) >= 1 and provide_formatted_code:
formatted = to_query(llama_endpoint, provide_full_code_prompt, get_full_code(code_frames))
full_code = extract_code(formatted)
full_code = extract_code(formatted, programming_language)

socketio.emit("finishedProcessing", full_code)
cap.release()
Expand Down
4 changes: 2 additions & 2 deletions app/static/js/preprocess.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ function nextTimestamp() {
let closestDiff = Infinity;

timestampsArr.forEach(ts => {
if (ts.seconds > currentTime) {
const difference = Math.abs(currentTime - ts.seconds);
if (Math.floor(ts.seconds) > Math.floor(currentTime)) {
const difference = Math.abs(Math.floor(currentTime) - Math.floor(ts.seconds));
if (difference < closestDiff) {
closestDiff = difference;
closest = ts;
Expand Down

0 comments on commit 9fe2019

Please sign in to comment.