From b68912ce5a4894978bd55b277a6f5d5cd5b7564d Mon Sep 17 00:00:00 2001 From: Roman Sinkus Date: Thu, 5 Dec 2024 23:49:56 -0800 Subject: [PATCH] Parse model output using pre-determined keys. --- .gitignore | 2 +- backend/src/routes/transcription-route.js | 3 ++- transcription/app.py | 8 +++++-- transcription/keys.json | 18 +++++++++++++++ transcription/transcription.py | 27 +++++++++++++++++++++++ 5 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 transcription/keys.json create mode 100644 transcription/transcription.py diff --git a/.gitignore b/.gitignore index 2969bf68..3be15fb0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -venv/ +venv* *.pyc *.pyo *.pyd diff --git a/backend/src/routes/transcription-route.js b/backend/src/routes/transcription-route.js index fac5d1fb..39fc19d3 100644 --- a/backend/src/routes/transcription-route.js +++ b/backend/src/routes/transcription-route.js @@ -12,11 +12,12 @@ router.post("/", auth, async (req, res) => { } const imageFile = req.files.image; + // const keyFile = req.files.keys; // TODO: implement ability to send key file const formData = new FormData(); formData.append("image", imageFile.data, imageFile.name); const response = await axios.post( - "http://localhost:5000/transcribe", + "http://127.0.0.1:5000/transcribe", //TODO: change this endpoint after deploying formData, { headers: { diff --git a/transcription/app.py b/transcription/app.py index f46534b9..3d39b7b9 100644 --- a/transcription/app.py +++ b/transcription/app.py @@ -4,6 +4,8 @@ from transformers import AutoProcessor, AutoModelForCausalLM import torch +from transcription import load_keys, parse_florence_output + app = Flask(__name__) CORS(app) @@ -16,6 +18,7 @@ @app.route("/api/transcribe", methods=["POST"]) def transcribe(): + print("START OF ENDPOINT") if "image" not in request.files: return jsonify({"error": "No image file provided"}), 400 @@ -33,8 +36,9 @@ def transcribe(): do_sample=False ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] - - return jsonify({"transcription": generated_text}) + keys = load_keys("keys.json") + json_result = parse_florence_output(generated_text, keys) + return json_result except Exception as e: return jsonify({"error": str(e)}), 500 diff --git a/transcription/keys.json b/transcription/keys.json new file mode 100644 index 00000000..0015adf9 --- /dev/null +++ b/transcription/keys.json @@ -0,0 +1,18 @@ +{ + "keys": [ + "Case No.", + "Patient ID", + "Type", + "Surgeon", + "OR Date", + "Age", + "M/F", + "Indication for Surgery/Reason for Referral", + "HPI", + "Meds", + "Allergies", + "ID", + "PMHx", + "Social" + ] +} diff --git a/transcription/transcription.py b/transcription/transcription.py new file mode 100644 index 00000000..632764da --- /dev/null +++ b/transcription/transcription.py @@ -0,0 +1,27 @@ +import json +import re + + +def load_keys(filePath): + with open(filePath, 'r') as file: + data = json.load(file) + return data['keys'] + +def parse_florence_output(output, keys): + if isinstance(output, dict): + output = json.dumps(output) # convert to JSON-formatted string + + parsed_data = {} + + for key in keys: + # Use regex to find the value for the key + pattern = re.compile(f"{re.escape(key)}:(.*?)(?=(?:{'|'.join(map(re.escape, keys))}|$))", re.DOTALL) + match = pattern.search(output) + + if match: + value = match.group(1).strip() + parsed_data[key] = value + + # Convert the parsed data to JSON format + json_data = json.dumps(parsed_data, indent=4) + return json_data \ No newline at end of file