diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 1d44a91..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/app/app.py b/app/app.py index ea60e33..5b315cc 100644 --- a/app/app.py +++ b/app/app.py @@ -22,6 +22,7 @@ socketio = SocketIO(app) + @app.context_processor def utility_processor(): """ @@ -206,7 +207,7 @@ def video(play_filename): global filename filename = play_filename video_data = utils.get_video_data(filename) - if video_data['processed'] == False and video_data['processing'] == False: + if video_data['processed'] is False and video_data['processing'] is False: print(filename) threading.Thread(target=pre_process.process_video, args=(str(filename), socketio)).start() return render_template("player.html", filename=filename, video_data=video_data) diff --git a/app/extract_text.py b/app/extract_text.py index c059b7c..edf18db 100644 --- a/app/extract_text.py +++ b/app/extract_text.py @@ -48,7 +48,7 @@ def format_raw_ocr_string(extracted_text: str) -> str: formatted_text = extracted_text print(formatted_text) if config("Formatting", "openai_analysis"): - if(openai.api_key == None or openai.api_key == ""): + if (openai.api_key is None or openai.api_key == ""): prompt = ExtractText.formatted_prompt(formatted_text, language) formatted_text = Llama.query(prompt) else: @@ -62,10 +62,12 @@ def format_raw_ocr_string(extracted_text: str) -> str: @staticmethod def formatted_prompt(extracted_text: str, language: str) -> str: return f"Analyse the following {language} code snippet:\n\n{extracted_text}\n\n" \ - f"If no '{language}' code is present, say 'No Code' and disregard the remaining prompt otherwise if '{language}' code is detected," \ - "If The Code is incomplete or has errors then prfix with 'Incomplete Code'" \ - f"correct any basic syntax errors, indentation errors, but do not add any code that does not exist in the sample and make sure to preserve comments" \ - f"Do NOT return any explanations, only code. Do NOT return leading or trailing backticks " + f"If no '{language}' code is present, say 'No Code' and disregard the remaining prompt otherwise if" \ + f"'{language}' code is detected," \ + "If The Code is incomplete or has errors then prefix with 'Incomplete Code'" \ + "correct any basic syntax errors, indentation errors, but do not add any code that does not exist" \ + "in the sample and make sure to preserve comments" \ + f"Do NOT return any explanations, only code. Do NOT return leading or trailing backticks" @staticmethod def extract_frame_at_timestamp(filename: str, timestamp: float) -> Union[cv2.VideoCapture, None]: diff --git a/app/pre_process.py b/app/pre_process.py index 311f9b2..9e3357b 100644 --- a/app/pre_process.py +++ b/app/pre_process.py @@ -1,4 +1,4 @@ -from utils import config, get_vid_save_path, update_user_video_data, get_video_data +from utils import get_vid_save_path, update_user_video_data, get_video_data import cv2 from PIL import Image import pytesseract @@ -7,14 +7,15 @@ from utils import config import time + def run_ocr(ret, frame): temp_frame = frame - if ret == True: + if ret is True: cv2.imwrite('temp.png', temp_frame) return pytesseract.image_to_string(Image.open('temp.png')) else: return None - + def formatted_prompt() -> str: return f"Analyse the following %LANGUAGE% code snippet:\n\n%QUESTION%\n\n" \ f"If no '%LANGUAGE%' code is present, say 'No Code' and disregard the remaining prompt. Otherwise if '%LANGUAGE%' code is detected:" \ @@ -22,6 +23,7 @@ def formatted_prompt() -> str: f"correct any indentation errors, but do not add any code that does not exist in the sample and make sure to preserve comments" \ f"Do NOT embellish the code, simply return the code as a codeblock or 'No Code'" + def seconds_to_timestamp(seconds): minutes = seconds // 60 seconds = seconds % 60 @@ -31,7 +33,7 @@ def seconds_to_timestamp(seconds): def process_video(video_file_name, socketio): print(f"Processing video {video_file_name}") cap = cv2.VideoCapture(get_vid_save_path() + video_file_name) - if not cap.isOpened(): + if not cap.isOpened(): print("Error opening video file") Llama.set_prompt(formatted_prompt()) language = config("UserSettings", "programming_language") @@ -51,21 +53,21 @@ def process_video(video_file_name, socketio): text = run_ocr(*cap.read()) response = Llama.query_with_default(text, language) #print(response) - if("```" in response): + if "```" in response: response = response.split("```")[1] print(response) - if("No Code" not in response and step_seconds not in steps_with_code): #Did we find code? + if "No Code" not in response and step_seconds not in steps_with_code: # Did we find code? dictEntry = {'timestamp': step_seconds, 'capture_content': response} update_user_video_data(video_file_name, None, dictEntry) steps_with_code.append(step_seconds) socketio.emit('update_timestamps', data=video_file_name) - if(was_last_step_code == False): #If we didn't find code last time, we want to skip back a bit + if not was_last_step_code: # If we didn't find code last time, we want to skip back a bit step_seconds -= 4 - else: #If we did find code last time, we want to skip forward a bit - step_seconds += 1 + else: # If we did find code last time, we want to skip forward a bit + step_seconds += 1 was_last_step_code = True - else: #We didn't find code skip forward + else: # We didn't find code skip forward step_seconds += 5 was_last_step_code = False update_user_video_data(video_file_name, None, None, True, False) @@ -96,4 +98,4 @@ def format_user_data(video_file_name, timestamp, dictEntry): video_data['captures'].sort(key=lambda x: x['timestamp']) # Save updated video data - update_user_video_data(video_file_name, timestamp, video_data) \ No newline at end of file + update_user_video_data(video_file_name, timestamp, video_data) diff --git a/app/remote_llama.py b/app/remote_llama.py index 2c0edd9..60775c5 100644 --- a/app/remote_llama.py +++ b/app/remote_llama.py @@ -1,5 +1,4 @@ import requests -import json import time