-
Notifications
You must be signed in to change notification settings - Fork 0
/
local.py
137 lines (116 loc) · 5.08 KB
/
local.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import cv2
from torchvision import transforms
import requests
from collections import deque
import numpy as np
import pickle # For efficient serialization
class GestureRecognizer:
def __init__(
self,
labels,
resize=(112, 112),
num_frames=24,
drop_frame=0,
server_url="http://127.0.0.1:5000/infer"
):
self.labels = labels
self.resize = resize
self.num_frames = num_frames
self.drop_frame = drop_frame
self.server_url = server_url
print("Gesture recognizer initialized.")
def run(self):
# Define transformation
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Resize(self.resize),
transforms.Normalize(
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]
)
])
# Initialize camera
cap = cv2.VideoCapture(0)
if not cap.isOpened():
raise RuntimeError("Failed to open the default camera.")
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames = deque(maxlen=self.num_frames)
frame_count = 0
print("Starting gesture recognition. Press 'q' to quit.")
while True:
frame_count += 1
# Drop frames
if frame_count % (self.drop_frame + 1) != 0:
continue
# Read frame
ret, frame = cap.read()
if not ret or frame is None:
print("Failed to capture frame. Exiting...")
break
# Flip the frame horizontally
frame = cv2.flip(frame, 1)
# Convert to RGB and apply transformations
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_tensor = transform(frame_rgb)
frames.append(frame_tensor)
# If enough frames are collected, send to server for prediction
if len(frames) == self.num_frames:
try:
# Convert deque to a numpy array with the shape (num_frames, channels, height, width)
frames_data = np.stack([f.numpy() for f in frames], axis = 1) # Stack along the time dimension
# Reorder to match the expected input shape (batch_size, channels, num_frames, height, width)
frames_data = frames_data[np.newaxis, ...] # Add batch dimension, now the shape is (1, channels, num_frames, height, width)
# Serialize frames using pickle
frames_data_serialized = pickle.dumps(frames_data)
# Send frames to server for inference
response = requests.post(self.server_url, data=frames_data_serialized, headers={"Content-Type": "application/octet-stream"})
if response.status_code == 200:
predicted_label = response.json().get("predicted_label", "Unknown")
print(f"Prediction: {predicted_label}")
else:
print(f"Error from server: {response.text}")
predicted_label = "Error"
except Exception as e:
print(f"Error in communication with server: {e}")
predicted_label = "Error"
# Display the result
font = cv2.FONT_HERSHEY_SIMPLEX
bottom_left_corner = (5, frame_height - 10)
cv2.putText(
frame,
f"{predicted_label}",
bottom_left_corner,
font,
0.5,
(0, 0, 255),
1,
cv2.LINE_AA,
)
# Display the frame
cv2.imshow('Frame', frame)
# Press Q on keyboard to stop recording
if cv2.waitKey(1) & 0xFF == ord('q'):
print("Exiting gesture recognition...")
break
# Release resources
cap.release()
cv2.destroyAllWindows()
def main():
labels = [
'Swiping Left', 'Swiping Right', 'Swiping Down', 'Swiping Up',
'Pushing Hand Away', 'Pulling Hand In', 'Sliding Two Fingers Left',
'Sliding Two Fingers Right', 'Sliding Two Fingers Down', 'Sliding Two Fingers Up',
'Pushing Two Fingers Away', 'Pulling Two Fingers In', 'Rolling Hand Forward',
'Rolling Hand Backward', 'Turning Hand Clockwise', 'Turning Hand Counterclockwise',
'Zooming In With Full Hand', 'Zooming Out With Full Hand', 'Zooming In With Two Fingers',
'Zooming Out With Two Fingers', 'Thumb Up', 'Thumb Down', 'Shaking Hand',
'Stop Sign', 'Drumming Fingers', 'No gesture', 'Doing other things'
]
program = GestureRecognizer(
labels=labels,
num_frames=30,
drop_frame=0
)
program.run()
if __name__ == '__main__':
main()