From 356ce01decec92a397e7a875e05558d17d1b9e50 Mon Sep 17 00:00:00 2001 From: Kirill Morozov Date: Sun, 31 Mar 2024 14:14:30 -0400 Subject: [PATCH] Added BlazeFace model loading. Added webcam image crop for better recognition. Todo: use blazeface to find square with face. --- app.py | 21 +++++++++-- smile_detector.py | 48 +++++++++++++++++++----- templates/index.html | 88 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 templates/index.html diff --git a/app.py b/app.py index 617f7d5..f160973 100644 --- a/app.py +++ b/app.py @@ -1,13 +1,14 @@ -from flask import Flask, request +from flask import Flask, request, render_template from smile_detector import SmileDetector -from io import BytesIO +from urllib.request import urlopen + app = Flask(__name__) smile_detector = SmileDetector() @app.route('/') def hello_world(): # put application's code here - return 'Hello World!' + return render_template('index.html') @app.route("/has_smile", methods = ['POST']) def has_smile(): @@ -18,6 +19,7 @@ def has_smile(): return "ok" return "ko" + @app.route("/find_faces", methods = ['POST']) def find_faces(): image_data = request.get_data() @@ -27,5 +29,18 @@ def find_faces(): return "ok" return "ko" +@app.route("/has_smile_json", methods = ['POST']) +def has_smile_json(): + json_data = request.json + with urlopen(json_data['image']) as response: + image_data = response.read() + + # image_data = requests.get(json_data['image'], stream=True).raw + res = smile_detector.smileCheck(image_data) + + if res: + return "ok" + return "ko" + if __name__ == '__main__': app.run() diff --git a/smile_detector.py b/smile_detector.py index b3155dd..3f208e4 100644 --- a/smile_detector.py +++ b/smile_detector.py @@ -1,6 +1,7 @@ import torch from PIL import Image from model.lennon import LeNNon +from model.blazeface.blazeface import BlazeFace from torchvision import transforms from io import BytesIO @@ -9,16 +10,29 @@ __main__.LeNNon=LeNNon + class SmileDetector: - def __init__(self): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + def initFaceDetectionModel(self): + modelDir = "model/blazeface/" + front_net = BlazeFace().to(self.device) + front_net.load_weights(modelDir + "blazeface.pth") + front_net.load_anchors(modelDir + "anchors.npy") + back_net = BlazeFace(back_model=True).to(self.device) + back_net.load_weights(modelDir + "blazefaceback.pth") + back_net.load_anchors(modelDir + "anchorsback.npy") + # Optionally change the thresholds: + front_net.min_score_thresh = 0.75 + front_net.min_suppression_threshold = 0.3 + + self.faceDetectionModel = front_net + def initSmileModel(self): # Load the model an pass it to the proper device modelPath = 'model/LeNNon-Smile-Detector.pt' - model = torch.load(modelPath) - model = model.to(device) - model.eval() + smileModel = torch.load(modelPath) + smileModel = smileModel.to(self.device) + smileModel.eval() # This `transform` object will transform our test images into proper tensors transform = transforms.Compose([ @@ -26,23 +40,37 @@ def __init__(self): transforms.ToTensor(), ]) + self.smileModel = smileModel + self.smileTransform = transform + + def __init__(self): + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.device = device - self.model = model - self.transform = transform + self.initSmileModel() + self.initFaceDetectionModel() def smileCheck(self, image_bytes: bytes): # Open and preprocess he image image_io = BytesIO(image_bytes) image = Image.open(image_io) - tensor = self.transform(image) + width, height = image.size # Get dimensions + imageCrop = transforms.CenterCrop((min(width,height),min(width,height))) + + tensor = self.smileTransform(imageCrop(image)) tensor = tensor.to(self.device) # forward pass trough the model with torch.no_grad(): - outputs = self.model(tensor) + outputs = self.smileModel(tensor) # Get the class prediction _, predicted = torch.max(outputs.data, 1) - return predicted.item() > 0 \ No newline at end of file + return predicted.item() > 0 + + def findFaces(self, image_bytes: bytes): + # res = self.faceDetectionModel.predict_on_image() + pass diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..6d82275 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,88 @@ + + + + + Display Webcam Stream + + + + + +
+ + +
+
+ + + +
+
+ +
+ + + + + + \ No newline at end of file