-
Notifications
You must be signed in to change notification settings - Fork 0
/
distance_estimator.py
150 lines (141 loc) · 6.64 KB
/
distance_estimator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import cv2 as cv
import numpy as np
import argparse
from face import FaceDet
from detectors.depth_midas import DepthEstimator
from detectors.card import CardDetector
from detectors.body import PersonDetector
from mono_calibrate import MonoCalibrator
class DistanceEstimator(object):
def __init__(self, camera_parameters):
self.camera = cv.VideoCapture(0)
self.f_length = camera_parameters.f_monocal
self.camMat = camera_parameters.camMat
self.dist = camera_parameters.dist
self.optimalMat = camera_parameters.optimalMat
self.roi = camera_parameters.roi
def stream(self, face, detector):
print(f'Press "q" to exit.')
self.cnt = 0
while True:
ok, frame = self.camera.read()
if not ok:
self.camera.release()
break
if cv.waitKey(1) & 0xff == ord('q'):
self.camera.release()
cv.destroyAllWindows()
break
# remove lense distorion from frame
undist = cv.undistort(frame, self.camMat, self.dist, None, self.optimalMat)
# crop image to remove empty pixels
x, y, w, h = self.roi
undist = undist[y:y+h, x:x+w]
undist = np.ascontiguousarray(undist)
image, distance = self.detect(frame, self.f_length, face, detector)
cv.imshow('Distance Estimation', image)
cv.waitKey(1)
def detect(self, frame, f_length, face, detector):
# clear face mesh
face.mesh = None
# populate face object with keypoints
detector.findIris(frame)
# if a face is detected, base S2c distance on iris diameter
if not face.mesh is None:
if len(face.mesh) > 468:
# visualize landmarks
detector.visualize(frame, 'iris')
# calculate median iris diameter (pixels)
i_diameter = face.get_iris_diameter()
# subject-to-camera distance (cm)
s2c_dist = self.s2c_dist(f_length, face.w_iris, i_diameter)
# convert from (cm) to (in)
s2c_dist /= 2.54
# write output to rgb frame
message = f"S2C Distances(in): {round(s2c_dist, 2)}"
messages = [message]
self.write_messages(messages, frame)
return frame, s2c_dist
# if no face is detected by iris model, use holistic
# S2C distance is based on median head width relative to iris diameter
elif face.mesh is None:
detector.holistic(frame)
if not face.mesh is None:
detector.visualize(frame, 'face')
pt1 = face.mesh[face.HEAD2[0]]
pt2 = face.mesh[face.HEAD2[1]]
face.get_headw(pt1, pt2, logging=False)
# subject-to-camera distance (cm)
s2c_dist = self.s2c_dist(f_length, face.head_w, face.head_pixw)
# convert from (cm) to (in)
s2c_dist /= 2.54
message = 'Iris not detected. Using holistic face mesh.'
message2 = f"S2C Distances(in): {round(s2c_dist, 2)}"
messages = [message, message2]
self.write_messages(messages, frame)
return frame, s2c_dist
# if no head points detected, neither model found a person
else:
# print(f'No detection. Face mesh: {type(self.face.mesh)}\nHead pts: {head_pts}')
message = 'Body not detected.'
# depth_frame = self.to_video_frame(depth_frame)
self.write_messages([message], frame)
return frame, None
def s2c_dist(self, f, w_object, w_pix):
'''
returns the subject-to-camera distance in mm using triangle similarity.
f : focal length in pixels
w_object : known width of object in mm
w_pix : object width in pixels
'''
# subject to camera distaince (mm)
s2c_d = (f * w_object) / w_pix
# transform mm to cm
s2c_d /= 10
return s2c_d
def write_messages(self, messages, frame):
for idx, m in enumerate(messages):
cv.putText(frame, m, (50, 50 + idx*50),
cv.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv.LINE_AA)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Code for Feature Matching with FLANN')
parser.add_argument('--depth_model', help='Depth model used for card localization', default="DPT_Large")
parser.add_argument('--feature_selector', help='Card feature description method: SIFT, ORB', default='SIFT')
parser.add_argument('--checker_dims', help='dimensions of interior checkerboard', default=(6,9))
parser.add_argument('--calibrate', help='Initiates camera calibration', action=argparse.BooleanOptionalAction)
parser.add_argument('--stream', help='card detection in video stream', action=argparse.BooleanOptionalAction)
parser.add_argument('--cameras', help='A dictionary with the name and busid of device', default={'camL':0})
args = parser.parse_args()
############################# do not remove
# necessary to initialize gui by using imshow before card detector is initialized
img = np.zeros((50,50,1), dtype=np.uint8)
cv.imshow('img', img)
cv.waitKey(1)
cv.destroyWindow('img')
############################# end do not remove
# calibration procedure
if args.calibrate:
# select a neural depth estimator
# MiDaS v3 - Large (highest accuracy, slowest inference speed)
estimator = DepthEstimator(args.depth_model)
# load card detector
card_detector = CardDetector(args.feature_selector)
# face object holds face data
face = FaceDet()
monocal = MonoCalibrator(args.cameras, face, args.checker_dims)
# Monocalibrator calulates camera intrinsics and updates face's iris width (mm)
face = monocal.stream(card_detector, estimator)
# logic to prevent abnormal iris diameters
# 12.5 mm is a threshold for diagnosing megalocornea
# 11.0 mm is a threshold for diagnosing microcornea
# assume measurement error and default to mean iris width
if face.w_iris > 12.4 or face.w_iris < 11:
print(f'measurement outside acceptable limits: {face.w_iris}\n defaulting to 11.7 mm')
face.w_iris = 11.7
print('Monocular Focal Length Estimate:')
print(f'focal length: {monocal.f_monocal}')
# distance estimation tools
detector = PersonDetector(face)
d_estimator = DistanceEstimator(monocal)
if args.stream:
d_estimator.stream(face, detector)