forked from xiexh20/behave-dataset
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbehave_demo.py
123 lines (99 loc) · 5.32 KB
/
behave_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
a simple demo script to show how to load different data given a sequence path
Author: Xianghui
Cite: BEHAVE: Dataset and Method for Tracking Human Object Interaction
"""
import sys, os
sys.path.append(os.getcwd())
import cv2
import numpy as np
from tqdm import tqdm
from os.path import join, dirname, basename
# imports for data loader and transformation between kinects
from data.frame_data import FrameDataReader
from data.kinect_transform import KinectTransform
# imports for rendering, you can replace with your own code
from viz.pyt3d_wrapper import Pyt3DWrapper
def main(args):
image_size = 1200
w, h = image_size, int(image_size * 0.75)
# FrameDataReader is the core class for dataset reading
reader = FrameDataReader(args.seq_folder)
# handle transformations between different kinect color cameras
# inside the constructor, the calibration info and kinect intrinsics are loaded
kinect_transform = KinectTransform(args.seq_folder, kinect_count=reader.kinect_count)
# defines the subfolder for loading fitting results
smpl_name = args.smpl_name
obj_name = args.obj_name
pyt3d_wrapper = Pyt3DWrapper(image_size=(w, h))
outdir = args.viz_dir
seq_save_path = join(outdir, reader.seq_name)
os.makedirs(seq_save_path, exist_ok=True)
seq_end = reader.cvt_end(args.end)
# mask_video_paths = [join(seq_save_path, f'mask_k{x}.mp4') for x in reader.seq_info.kids]
rend_video_path = join(seq_save_path, f'smpl_{smpl_name}_obj_{obj_name}_s{args.start}_e{seq_end}.mp4')
video_writer = None
loop = tqdm(range(args.start, seq_end))
loop.set_description(reader.seq_name)
for i in loop:
# load smpl and object fit meshes
smpl_fit = reader.get_smplfit(i, smpl_name)
obj_fit = reader.get_objfit(i, obj_name)
if smpl_fit is None or obj_fit is None:
print('no fitting result for frame: {}'.format(reader.frame_time(i)))
continue
fit_meshes = [smpl_fit, obj_fit]
# get all color images in this frame
kids = [1, 2] # choose which kinect id to visualize
imgs_all = reader.get_color_images(i, reader.kids)
imgs_resize = [cv2.resize(x, (w, h)) for x in imgs_all]
overlaps = [imgs_resize[1]]
selected_imgs = [imgs_resize[x] for x in kids] # here we render fitting in all 4 views
for orig, kid in zip(selected_imgs, kids):
# transform fitted mesh from world coordinate to local color coordinate, same for point cloud
fit_meshes_local = kinect_transform.world2local_meshes(fit_meshes, kid)
# render mesh
rend = pyt3d_wrapper.render_meshes(fit_meshes_local, viz_contact=args.viz_contact)
h, w = orig.shape[:2]
overlap = cv2.resize((rend*255).astype(np.uint8), (w, h))
cv2.putText(overlap, f'kinect {kid}', (w // 3, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2)
overlaps.append(overlap)
comb = np.concatenate(overlaps, 1)
cv2.putText(comb, reader.frame_time(i), (w//3, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 255), 2)
if video_writer is None:
ch, cw = comb.shape[:2]
video_writer = cv2.VideoWriter(rend_video_path, 0x7634706d, 3, (cw, ch))
video_writer.write(cv2.cvtColor(comb, cv2.COLOR_RGB2BGR))
# load person and object pc, return psbody.Mesh
# convert flag is used to be compatible with detectron2 classes, in detectron2 all chairs are clasified as chair,
# so the chair pc is saved in subfolder chair; also all yogaball, basketball are classified as 'sports ball',
# obj_pc = reader.get_pc(i, 'obj', convert=True)
# person_pc = reader.get_pc(i, 'person')
# load person and object mask
# for kid, rgb, writer in zip(kids, imgs_all, video_writers):
# obj_mask = np.zeros_like(rgb).astype(np.uint8)
# mask = reader.get_mask(i, kid, 'obj', ret_bool=True)
# if mask is None:
# continue # mask can be None if there is not fitting in this frame
# obj_mask[mask] = np.array([255, 0, 0])
#
# person_mask = np.zeros_like(rgb).astype(np.uint8)
# mask = reader.get_mask(i, kid, 'person', ret_bool=True)
# person_mask[mask] = np.array([255, 0, 0])
#
# comb = np.concatenate([rgb, person_mask, obj_mask], 1)
# ch, cw = comb.shape[:2]
# writer.append_data(cv2.resize(comb, (cw//3, ch//3)))
video_writer.release()
if __name__ == '__main__':
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument('-s', '--seq_folder')
parser.add_argument('-sn', '--smpl_name', help='smpl fitting save name, for final dataset, use fit02', default='fit02')
parser.add_argument('-on', '--obj_name', help='object fitting save name, for final dataset, use fit01', default='fit01')
parser.add_argument('-fs', '--start', type=int, default=0, help='start from which frame')
parser.add_argument('-fe', '--end', type=int, default=None, help='ends at which frame')
parser.add_argument('-v', '--viz_dir', default="/BS/xxie-4/work/viz", help='path to save you r visualization videos')
parser.add_argument('-vc', '--viz_contact', default=False, action='store_true', help='visualize contact sphere or not')
args = parser.parse_args()
main(args)