From 22e08e9f90e11e5162e51763bee75b904db356eb Mon Sep 17 00:00:00 2001 From: okan Date: Tue, 5 Feb 2019 16:25:30 +0100 Subject: [PATCH] bug in online test fixed --- datasets/egogesture_online.py | 7 +- model.py | 4 +- offline_test.py | 256 +++++++++++++++ online_test.py | 377 +++++++++++++++++++++ opts.py | 80 ++--- real_time_test.py | 594 ---------------------------------- results/opts.json | 2 +- run_offline.sh | 21 +- run_online.sh | 22 +- utils.py | 74 ++++- 10 files changed, 779 insertions(+), 658 deletions(-) create mode 100644 offline_test.py create mode 100644 online_test.py delete mode 100644 real_time_test.py diff --git a/datasets/egogesture_online.py b/datasets/egogesture_online.py index f332bf2..09327ba 100644 --- a/datasets/egogesture_online.py +++ b/datasets/egogesture_online.py @@ -232,7 +232,12 @@ def __getitem__(self, index): clip = [self.spatial_transform(img) for img in clip] im_dim = clip[0].size()[-2:] - clip = torch.cat(clip, 0).view((self.sample_duration, -1) + im_dim).permute(1, 0, 2, 3) + try: + clip = torch.cat(clip, 0).view((self.sample_duration, -1) + im_dim).permute(1, 0, 2, 3) + except Exception as e: + pdb.set_trace() + raise e + # clip = torch.stack(clip, 0).permute(1, 0, 2, 3) target = self.data[index] diff --git a/model.py b/model.py index 1cedb13..ab61eb8 100644 --- a/model.py +++ b/model.py @@ -118,9 +118,9 @@ def generate_model(opt): elif opt.modality == 'Depth': print("[INFO]: Converting the pretrained model to Depth init model") model = _construct_depth_model(model) - print("[INFO]: Done. Flow model ready.") + print("[INFO]: Deoth model ready.") elif opt.modality == 'RGB-D': - print("[INFO]: Converting the pretrained model to RGB+D init model") + print("[INFO]: Converting the pretrained model to RGB-D init model") model = _construct_rgbdepth_model(model) print("[INFO]: Done. RGB-D model ready.") diff --git a/offline_test.py b/offline_test.py new file mode 100644 index 0000000..f10c692 --- /dev/null +++ b/offline_test.py @@ -0,0 +1,256 @@ +import argparse +import time +import os +import sys +import json +import shutil +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import itertools +import torch +from torch.autograd import Variable +from sklearn.metrics import confusion_matrix +from torch.nn import functional as F + +from opts import parse_opts_offline +from model import generate_model +from mean import get_mean, get_std +from spatial_transforms import * +from temporal_transforms import * +from target_transforms import ClassLabel, VideoID +from target_transforms import Compose as TargetCompose +from dataset import get_training_set, get_validation_set, get_test_set, get_online_data +from utils import Logger +from train import train_epoch +from validation import val_epoch +import test +from utils import AverageMeter, calculate_precision, calculate_recall +import pdb +from sklearn.metrics import confusion_matrix + +def plot_cm(cm, classes, normalize = True): + import seaborn as sns + if normalize: + cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + print("Normalized confusion matrix") + else: + print('Confusion matrix, without normalization') + + ax= plt.subplot() + sns.heatmap(cm, annot=False, ax = ax); #annot=True to annotate cells + + # labels, title and ticks + ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); + plt.xticks(rotation='vertical') + plt.yticks(rotation='horizontal') + + + +def calculate_accuracy(outputs, targets, topk=(1,)): + maxk = max(topk) + batch_size = targets.size(0) + _, pred = outputs.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(targets.view(1, -1).expand_as(pred)) + ret = [] + for k in topk: + correct_k = correct[:k].float().sum().item() + ret.append(correct_k / batch_size) + + return ret + + +opt = parse_opts_offline() +if opt.root_path != '': + opt.video_path = os.path.join(opt.root_path, opt.video_path) + opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) + opt.result_path = os.path.join(opt.root_path, opt.result_path) + if opt.resume_path: + opt.resume_path = os.path.join(opt.root_path, opt.resume_path) + if opt.pretrain_path: + opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) +opt.scales = [opt.initial_scale] +for i in range(1, opt.n_scales): + opt.scales.append(opt.scales[-1] * opt.scale_step) +opt.arch = '{}-{}'.format(opt.model, opt.model_depth) +opt.mean = get_mean(opt.norm_value) +opt.std = get_std(opt.norm_value) + +print(opt) +with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: + json.dump(vars(opt), opt_file) + +torch.manual_seed(opt.manual_seed) + +model, parameters = generate_model(opt) +print(model) +pytorch_total_params = sum(p.numel() for p in model.parameters() if + p.requires_grad) +print("Total number of trainable parameters: ", pytorch_total_params) + +if opt.no_mean_norm and not opt.std_norm: + norm_method = Normalize([0, 0, 0], [1, 1, 1]) +elif not opt.std_norm: + norm_method = Normalize(opt.mean, [1, 1, 1]) +else: + norm_method = Normalize(opt.mean, opt.std) + + +spatial_transform = Compose([ + #Scale(opt.sample_size), + Scale(112), + CenterCrop(112), + ToTensor(opt.norm_value), norm_method + ]) +temporal_transform = TemporalCenterCrop(opt.sample_duration) +#temporal_transform = TemporalBeginCrop(opt.sample_duration) +#temporal_transform = TemporalEndCrop(opt.sample_duration) +target_transform = ClassLabel() +test_data = get_test_set( + opt, spatial_transform, temporal_transform, target_transform) + +test_loader = torch.utils.data.DataLoader( + test_data, + batch_size=opt.batch_size, + shuffle=False, + num_workers=opt.n_threads, + pin_memory=True) +test_logger = Logger(os.path.join(opt.result_path, 'test.log'), + [ 'top1', 'top5', 'precision', 'recall']) + + +if opt.resume_path: + print('loading checkpoint {}'.format(opt.resume_path)) + checkpoint = torch.load(opt.resume_path) + assert opt.arch == checkpoint['arch'] + + opt.begin_epoch = checkpoint['epoch'] + model.load_state_dict(checkpoint['state_dict']) + + +#test.test(test_loader, model, opt, test_data.class_names) + + + +recorder = [] + +print('run') + +model.eval() + +batch_time = AverageMeter() +top1 = AverageMeter() +top5 = AverageMeter() +precisions = AverageMeter() # +recalls = AverageMeter() + +y_true = [] +y_pred = [] +end_time = time.time() +for i, (inputs, targets) in enumerate(test_loader): + if not opt.no_cuda: + targets = targets.cuda(async=True) + #inputs = Variable(torch.squeeze(inputs), volatile=True) + with torch.no_grad(): + inputs = Variable(inputs) + targets = Variable(targets) + outputs = model(inputs) + if not opt.no_softmax_in_test: + outputs = F.softmax(outputs) + recorder.append(outputs.data.cpu().numpy().copy()) + y_true.extend(targets.cpu().numpy().tolist()) + y_pred.extend(outputs.argmax(1).cpu().numpy().tolist()) + + #outputs = torch.unsqueeze(torch.mean(outputs, 0), 0) + #pdb.set_trace() + # print(outputs.shape, targets.shape) + if outputs.size(1) <= 4: + + prec1= calculate_accuracy(outputs, targets, topk=(1,)) + precision = calculate_precision(outputs, targets) # + recall = calculate_recall(outputs,targets) + + + top1.update(prec1[0], inputs.size(0)) + precisions.update(precision, inputs.size(0)) + recalls.update(recall,inputs.size(0)) + + batch_time.update(time.time() - end_time) + end_time = time.time() + + + + print('[{0}/{1}]\t' + 'Time {batch_time.val:.5f} ({batch_time.avg:.5f})\t' + 'prec@1 {top1.avg:.5f} \t' + 'precision {precision.val:.5f} ({precision.avg:.5f})\t' + 'recall {recall.val:.5f} ({recall.avg:.5f})'.format( + i + 1, + len(test_loader), + batch_time=batch_time, + top1 =top1, + precision = precisions, + recall = recalls)) + else: + + prec1, prec5 = calculate_accuracy(outputs, targets, topk=(1,5)) + precision = calculate_precision(outputs, targets) # + recall = calculate_recall(outputs,targets) + + + top1.update(prec1, inputs.size(0)) + top5.update(prec5, inputs.size(0)) + precisions.update(precision, inputs.size(0)) + recalls.update(recall,inputs.size(0)) + + batch_time.update(time.time() - end_time) + end_time = time.time() + print('[{0}/{1}]\t' + 'Time {batch_time.val:.5f} ({batch_time.avg:.5f})\t' + 'prec@1 {top1.avg:.5f} prec@5 {top5.avg:.5f}\t' + 'precision {precision.val:.5f} ({precision.avg:.5f})\t' + 'recall {recall.val:.5f} ({recall.avg:.5f})'.format( + i + 1, + len(test_loader), + batch_time=batch_time, + top1 =top1, + top5=top5, + precision = precisions, + recall = recalls)) +test_logger.log({ + 'top1': top1.avg, + 'top5': top5.avg, + 'precision':precisions.avg, + 'recall':recalls.avg + }) + +""" +video_pred = [np.argmax(np.mean(x, axis=0)) for x in recorder] +print(video_pred) + +with open('annotation_Jester/categories.txt') as f: + lines = f.readlines() + categories = [item.rstrip() for item in lines] + +name_list = [x.strip().split()[0] for x in open('annotation_Jester/testlist01.txt')] +order_dict = {e:i for i, e in enumerate(sorted(name_list))} +reorder_output = [None] * len(recorder) +reorder_pred = [None] * len(recorder) +output_csv = [] +for i in range(len(recorder)): + idx = order_dict[name_list[i]] + reorder_output[idx] = recorder[i] + reorder_pred[idx] = video_pred[i] + output_csv.append('%s;%s'%(name_list[i], + categories[video_pred[i]])) + + with open('output_predictions.csv','w') as f: + f.write('\n'.join(output_csv)) +""" + +# cm = confusion_matrix(y_true, y_pred) +# plot_cm(cm, np.unique(y_true).tolist()) +# plt.savefig(opt.dataset + '_confusion_matrix2.png', dpi = 180) +print('-----Evaluation is finished------') +print('Overall Prec@1 {:.05f}% Prec@5 {:.05f}%'.format(top1.avg, top5.avg)) diff --git a/online_test.py b/online_test.py new file mode 100644 index 0000000..cb467a6 --- /dev/null +++ b/online_test.py @@ -0,0 +1,377 @@ +import argparse +import time +import os +import glob +import sys +import json +import shutil +import itertools +import numpy as np +import pandas as pd +import csv +import torch +from torch.autograd import Variable +from sklearn.metrics import confusion_matrix +from torch.nn import functional as F + +from opts import parse_opts_online +from model import generate_model +from mean import get_mean, get_std +from spatial_transforms import * +from temporal_transforms import * +from target_transforms import ClassLabel +from dataset import get_online_data +from utils import Logger, AverageMeter, LevenshteinDistance, Queue + +import pdb +import numpy as np + +import matplotlib.pyplot as plt +from matplotlib.pyplot import figure + + +def weighting_func(x): + return (1 / (1 + np.exp(-0.2*(x-9)))) + + +opt = parse_opts_online() + +def load_models(opt): + opt.resume_path = opt.resume_path_det + opt.pretrain_path = opt.pretrain_path_det + opt.sample_duration = opt.sample_duration_det + opt.model = opt.model_det + opt.model_depth = opt.model_depth_det + opt.modality = opt.modality_det + opt.resnet_shortcut = opt.resnet_shortcut_det + opt.n_classes = opt.n_classes_det + opt.n_finetune_classes = opt.n_finetune_classes_det + + if opt.root_path != '': + opt.video_path = os.path.join(opt.root_path, opt.video_path) + opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) + opt.result_path = os.path.join(opt.root_path, opt.result_path) + if opt.resume_path: + opt.resume_path = os.path.join(opt.root_path, opt.resume_path) + if opt.pretrain_path: + opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) + + + + + opt.scales = [opt.initial_scale] + for i in range(1, opt.n_scales): + opt.scales.append(opt.scales[-1] * opt.scale_step) + opt.arch = '{}-{}'.format(opt.model, opt.model_depth) + opt.mean = get_mean(opt.norm_value) + opt.std = get_std(opt.norm_value) + + print(opt) + with open(os.path.join(opt.result_path, 'opts_det.json'), 'w') as opt_file: + json.dump(vars(opt), opt_file) + + torch.manual_seed(opt.manual_seed) + + detector, parameters = generate_model(opt) + + if opt.resume_path: + opt.resume_path = os.path.join(opt.root_path, opt.resume_path) + print('loading checkpoint {}'.format(opt.resume_path)) + checkpoint = torch.load(opt.resume_path) + assert opt.arch == checkpoint['arch'] + + detector.load_state_dict(checkpoint['state_dict']) + + print('Model 1 \n', detector) + pytorch_total_params = sum(p.numel() for p in detector.parameters() if + p.requires_grad) + print("Total number of trainable parameters: ", pytorch_total_params) + + + opt.resume_path = opt.resume_path_clf + opt.pretrain_path = opt.pretrain_path_clf + opt.sample_duration = opt.sample_duration_clf + opt.model = opt.model_clf + opt.model_depth = opt.model_depth_clf + opt.modality = opt.modality_clf + opt.resnet_shortcut = opt.resnet_shortcut_clf + opt.n_classes = opt.n_classes_clf + opt.n_finetune_classes = opt.n_finetune_classes_clf + if opt.root_path != '': + opt.video_path = os.path.join(opt.root_path, opt.video_path) + opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) + opt.result_path = os.path.join(opt.root_path, opt.result_path) + if opt.resume_path: + opt.resume_path = os.path.join(opt.root_path, opt.resume_path) + if opt.pretrain_path: + opt.pretrain_path = os.path.join(opt.root_path, opt.pretrain_path) + + opt.scales = [opt.initial_scale] + for i in range(1, opt.n_scales): + opt.scales.append(opt.scales[-1] * opt.scale_step) + opt.arch = '{}-{}'.format(opt.model, opt.model_depth) + opt.mean = get_mean(opt.norm_value) + opt.std = get_std(opt.norm_value) + + print(opt) + with open(os.path.join(opt.result_path, 'opts_clf.json'), 'w') as opt_file: + json.dump(vars(opt), opt_file) + + torch.manual_seed(opt.manual_seed) + classifier, parameters = generate_model(opt) + + if opt.resume_path: + print('loading checkpoint {}'.format(opt.resume_path)) + checkpoint = torch.load(opt.resume_path) + assert opt.arch == checkpoint['arch'] + + classifier.load_state_dict(checkpoint['state_dict']) + + print('Model 2 \n', classifier) + pytorch_total_params = sum(p.numel() for p in classifier.parameters() if + p.requires_grad) + print("Total number of trainable parameters: ", pytorch_total_params) + + return detector, classifier + +detector,classifier = load_models(opt) + +if opt.no_mean_norm and not opt.std_norm: + norm_method = Normalize([0, 0, 0], [1, 1, 1]) +elif not opt.std_norm: + norm_method = Normalize(opt.mean, [1, 1, 1]) +else: + norm_method = Normalize(opt.mean, opt.std) + + +spatial_transform = Compose([ + Scale(112), + CenterCrop(112), + ToTensor(opt.norm_value), norm_method + ]) + +target_transform = ClassLabel() + + + + +## Get list of videos to test +if opt.dataset == 'egogesture': + subject_list = ['Subject{:02d}'.format(i) for i in [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]] + test_paths = [] + for subject in subject_list: + for x in glob.glob(os.path.join(opt.video_path,subject,'*/*/rgb*')): + test_paths.append(x) +elif opt.dataset == 'nv': + df = pd.read_csv(os.path.join(opt.video_path,'nvgesture_test_correct_cvpr2016_v2.lst'), delimiter = ' ', header = None) + test_paths = [] + for x in df[0].values: + test_paths.append(os.path.join(opt.video_path, x.replace('path:', ''), 'sk_color_all')) + + + +print('Start Evaluation') +detector.eval() +classifier.eval() + +levenshtein_accuracies = AverageMeter() +videoidx = 0 +for path in test_paths[4:]: + if opt.dataset == 'egogesture': + opt.whole_path = path.split(os.sep, 4)[-1] + elif opt.dataset == 'nv': + opt.whole_path = path.split(os.sep, 3)[-1] + + videoidx += 1 + active_index = 0 + passive_count = 0 + active = False + prev_active = False + finished_prediction = None + pre_predict = False + + cum_sum = np.zeros(opt.n_classes_clf,) + clf_selected_queue = np.zeros(opt.n_classes_clf,) + det_selected_queue = np.zeros(opt.n_classes_det,) + myqueue_det = Queue(opt.det_queue_size , n_classes = opt.n_classes_det) + myqueue_clf = Queue(opt.clf_queue_size, n_classes = opt.n_classes_clf ) + + + print('[{}/{}]============'.format(videoidx,len(test_paths))) + print(path) + opt.sample_duration = max(opt.sample_duration_clf, opt.sample_duration_det) + test_data = get_online_data( + opt, spatial_transform, None, target_transform) + + test_loader = torch.utils.data.DataLoader( + test_data, + batch_size=opt.batch_size, + shuffle=False, + num_workers=opt.n_threads, + pin_memory=True) + + + results = [] + prev_best1 = opt.n_classes_clf + + for i, (inputs, targets) in enumerate(test_loader): + if not opt.no_cuda: + targets = targets.cuda(async=True) + ground_truth_array = np.zeros(opt.n_classes_clf +1,) + with torch.no_grad(): + inputs = Variable(inputs) + targets = Variable(targets) + if opt.modality_det == 'RGB': + inputs_det = inputs[:,:-1,-opt.sample_duration_det:,:,:] + elif opt.modality_det == 'Depth': + inputs_det = inputs[:,-1,-opt.sample_duration_det:,:,:].unsqueeze(1) + elif opt.modality_det =='RGB-D': + inputs_det = inputs[:,:,-opt.sample_duration_det:,:,:] + + outputs_det = detector(inputs_det) + outputs_det = F.softmax(outputs_det,dim=1) + outputs_det = outputs_det.cpu().numpy()[0].reshape(-1,) + + # enqueue the probabilities to the detector queue + myqueue_det.enqueue(outputs_det.tolist()) + + if opt.det_strategy == 'raw': + det_selected_queue = outputs_det + elif opt.det_strategy == 'median': + det_selected_queue = myqueue_det.median + elif opt.det_strategy == 'ma': + det_selected_queue = myqueue_det.ma + elif opt.det_strategy == 'ewma': + det_selected_queue = myqueue_det.ewma + + + prediction_det = np.argmax(det_selected_queue) + prob_det = det_selected_queue[prediction_det] + + #### State of the detector is checked here as detector act as a switch for the classifier + if prediction_det == 1: + if opt.modality_clf == 'RGB': + inputs_clf = inputs[:,:-1,:,:,:] + elif opt.modality_clf == 'Depth': + inputs_clf = inputs[:,-1,:,:,:].unsqueeze(1) + elif opt.modality_clf =='RGB-D': + inputs_clf = inputs[:,:,:,:,:] + + outputs_clf = classifier(inputs_clf) + outputs_clf = F.softmax(outputs_clf,dim=1) + outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1,) + + # Push the probabilities to queue + myqueue_clf.enqueue(outputs_clf.tolist()) + passive_count = 0 + + if opt.clf_strategy == 'raw': + clf_selected_queue = outputs_clf + elif opt.clf_strategy == 'median': + clf_selected_queue = myqueue_clf.median + elif opt.clf_strategy == 'ma': + clf_selected_queue = myqueue_clf.ma + elif opt.clf_strategy == 'ewma': + clf_selected_queue = myqueue_clf.ewma + + else: + outputs_clf = np.zeros(opt.n_classes_clf ,) + # Push the probabilities to queue + myqueue_clf.enqueue(outputs_clf.tolist()) + passive_count += 1 + + + + if passive_count >= opt.det_counter: + active = False + else: + active = True + + # one of the following line need to be commented !!!! + if active: + active_index += 1 + cum_sum = ((cum_sum * (active_index-1)) + (weighting_func(active_index) * clf_selected_queue))/active_index # Weighted Aproach + # cum_sum = ((cum_sum * (x-1)) + (1.0 * clf_selected_queue))/x #Not Weighting Aproach + + best2, best1 = tuple(cum_sum.argsort()[-2:][::1]) + if float(cum_sum[best1]- cum_sum[best2]) > opt.clf_threshold_pre: + finished_prediction = True + pre_predict = True + + else: + active_index = 0 + + + if active == False and prev_active == True: + finished_prediction = True + elif active == True and prev_active == False: + finished_prediction = False + + + + if finished_prediction == True: + best2, best1 = tuple(cum_sum.argsort()[-2:][::1]) + if cum_sum[best1]>opt.clf_threshold_final: + if pre_predict == True: + if best1 != prev_best1: + if cum_sum[best1]>opt.clf_threshold_final: + results.append(((i*opt.stride_len)+opt.sample_duration_clf,best1)) + print( 'Early Detected - class : {} with prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration_clf)) + else: + if cum_sum[best1]>opt.clf_threshold_final: + if best1 == prev_best1: + if cum_sum[best1]>5: + results.append(((i*opt.stride_len)+opt.sample_duration_clf,best1)) + print( 'Late Detected - class : {} with prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration_clf)) + else: + results.append(((i*opt.stride_len)+opt.sample_duration_clf,best1)) + + print( 'Late Detected - class : {} with prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration_clf)) + + finished_prediction = False + prev_best1 = best1 + + cum_sum = np.zeros(opt.n_classes_clf,) + + if active == False and prev_active == True: + pre_predict = False + + prev_active = active + + if opt.dataset == 'egogesture': + target_csv_path = os.path.join(opt.video_path.rsplit(os.sep, 1)[0], + 'labels-final-revised1', + opt.whole_path.rsplit(os.sep,2)[0], + 'Group'+opt.whole_path[-1] + '.csv').replace('Subject', 'subject') + true_classes = [] + with open(target_csv_path) as csvfile: + readCSV = csv.reader(csvfile, delimiter=',') + for row in readCSV: + true_classes.append(int(row[0])-1) + elif opt.dataset == 'nv': + true_classes = [] + with open('./annotation_nvGesture/vallistall.txt') as csvfile: + readCSV = csv.reader(csvfile, delimiter=' ') + for row in readCSV: + if row[0] == opt.whole_path: + if row[1] != '26' : + true_classes.append(int(row[1])-1) + + predicted = np.array(results)[:,1] + + true_classes = np.array(true_classes) + levenshtein_distance = LevenshteinDistance(true_classes, predicted) + levenshtein_accuracy = 1-(levenshtein_distance/len(true_classes)) + if levenshtein_distance <0: # Distance cannot be less than 0 + levenshtein_accuracies.update(0, len(true_classes)) + else: + levenshtein_accuracies.update(levenshtein_accuracy, len(true_classes)) + + + print('predicted classes: \t',predicted) + print('True classes :\t\t',true_classes) + print('Levenshtein Accuracy = {} ({})'.format(levenshtein_accuracies.val, levenshtein_accuracies.avg)) + +print('Average Levenshtein Accuracy= {}'.format(levenshtein_accuracies.avg)) + +print('-----Evaluation is finished------') \ No newline at end of file diff --git a/opts.py b/opts.py index 8805a28..c39d7ab 100644 --- a/opts.py +++ b/opts.py @@ -287,6 +287,7 @@ def parse_opts_online(): parser.add_argument('--annotation_path', default='kinetics.json', type=str, help='Annotation file path') parser.add_argument('--result_path', default='results', type=str, help='Result directory path') parser.add_argument('--store_name', default='model', type=str, help='Name to store checkpoints') + parser.add_argument('--modality', default='RGB', type=str, help='Modality of input data. RGB, Flow or RGBFlow') parser.add_argument('--modality_det', default='RGB', type=str, help='Modality of input data. RGB, Flow or RGBFlow') parser.add_argument('--modality_clf', default='RGB', type=str, help='Modality of input data. RGB, Flow or RGBFlow') parser.add_argument('--dataset', default='kinetics', type=str, help='Used dataset (activitynet | kinetics | ucf101 | hmdb51)') @@ -294,53 +295,51 @@ def parse_opts_online(): parser.add_argument('--n_finetune_classes_det', default=400, type=int, help='Number of classes for fine-tuning. n_classes is set to the number when pretraining.') parser.add_argument('--n_classes_clf', default=400, type=int, help='Number of classes (activitynet: 200, kinetics: 400, ucf101: 101, hmdb51: 51)') parser.add_argument('--n_finetune_classes_clf', default=400, type=int, help='Number of classes for fine-tuning. n_classes is set to the number when pretraining.') + + parser.add_argument('--n_classes', default=400, type=int, help='Number of classes (activitynet: 200, kinetics: 400, ucf101: 101, hmdb51: 51)') + parser.add_argument('--n_finetune_classes', default=400, type=int, help='Number of classes for fine-tuning. n_classes is set to the number when pretraining.') parser.add_argument('--sample_size', default=112, type=int, help='Height and width of inputs') parser.add_argument('--sample_duration_det', default=16, type=int, help='Temporal duration of inputs') parser.add_argument('--sample_duration_clf', default=16, type=int, help='Temporal duration of inputs') - parser.add_argument( - '--n_val_samples', - default=3, - type=int, - help='Number of validation samples for each activity') + parser.add_argument('--sample_duration', default=16, type=int, help='Temporal duration of inputs') + parser.add_argument('--initial_scale', default=1.0, type=float, help='Initial scale for multiscale cropping') parser.add_argument('--n_scales', default=5, type=int, help='Number of scales for multiscale cropping') parser.add_argument('--scale_step', default=0.84089641525, type=float, help='Scale step for multiscale cropping') parser.add_argument('--train_crop', default='corner', type=str, help='Spatial cropping method in training. random is uniform. corner is selection from 4 corners and 1 center. (random | corner | center)') - # parser.add_argument('--learning_rate', default=0.1, type=float, help='Initial learning rate (divided by 10 while training by lr scheduler)') - # parser.add_argument('--lr_steps', default=[10, 20, 30, 40, 100], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') - # parser.add_argument('--momentum', default=0.9, type=float, help='Momentum') - # parser.add_argument('--dampening', default=0.9, type=float, help='dampening of SGD') - # parser.add_argument('--weight_decay', default=1e-3, type=float, help='Weight Decay') + parser.add_argument('--learning_rate', default=0.1, type=float, help='Initial learning rate (divided by 10 while training by lr scheduler)') + parser.add_argument('--lr_steps', default=[10, 20, 30, 40, 100], type=float, nargs="+", metavar='LRSteps', help='epochs to decay learning rate by 10') + parser.add_argument('--momentum', default=0.9, type=float, help='Momentum') + parser.add_argument('--dampening', default=0.9, type=float, help='dampening of SGD') + parser.add_argument('--weight_decay', default=1e-3, type=float, help='Weight Decay') parser.add_argument('--mean_dataset', default='activitynet', type=str, help='dataset for mean values of mean subtraction (activitynet | kinetics)') parser.add_argument('--no_mean_norm', action='store_true', help='If true, inputs are not normalized by mean.') parser.set_defaults(no_mean_norm=False) parser.add_argument('--std_norm', action='store_true', help='If true, inputs are normalized by standard deviation.') parser.set_defaults(std_norm=False) - # parser.add_argument('--nesterov', action='store_true', help='Nesterov momentum') - # parser.set_defaults(nesterov=False) - # parser.add_argument('--optimizer', default='sgd', type=str, help='Currently only support SGD') - # parser.add_argument('--lr_patience', default=10, type=int, help='Patience of LR scheduler. See documentation of ReduceLROnPlateau.') + parser.add_argument('--nesterov', action='store_true', help='Nesterov momentum') + parser.set_defaults(nesterov=False) + parser.add_argument('--optimizer', default='sgd', type=str, help='Currently only support SGD') + parser.add_argument('--lr_patience', default=10, type=int, help='Patience of LR scheduler. See documentation of ReduceLROnPlateau.') parser.add_argument('--batch_size', default=128, type=int, help='Batch Size') - # parser.add_argument('--n_epochs', default=200, type=int, help='Number of total epochs to run') - # parser.add_argument('--begin_epoch', default=1, type=int, help='Training begins at this epoch. Previous trained model indicated by resume_path is loaded.') - # parser.add_argument('--n_val_samples', default=3, type=int, help='Number of validation samples for each activity') + parser.add_argument('--n_epochs', default=200, type=int, help='Number of total epochs to run') + parser.add_argument('--begin_epoch', default=1, type=int, help='Training begins at this epoch. Previous trained model indicated by resume_path is loaded.') + parser.add_argument('--n_val_samples', default=3, type=int, help='Number of validation samples for each activity') parser.add_argument('--resume_path_det', default='', type=str, help='Save data (.pth) of previous training') parser.add_argument('--resume_path_clf', default='', type=str, help='Save data (.pth) of previous training') - # parser.add_argument('--pretrain_path_det', default='', type=str, help='Pretrained model (.pth)') - # parser.add_argument('--pretrain_path_clf', default='', type=str, help='Pretrained model (.pth)') - parser.add_argument( - '--pretrain_path', - default='', - type=str, - help='Pretrained model (.pth)') + parser.add_argument('--resume_path', default='', type=str, help='Save data (.pth) of previous training') + parser.add_argument('--pretrain_path_det', default='', type=str, help='Pretrained model (.pth)') + parser.add_argument('--pretrain_path_clf', default='', type=str, help='Pretrained model (.pth)') + parser.add_argument('--pretrain_path', default='', type=str, help='Pretrained model (.pth)') + parser.add_argument('--ft_begin_index', default=0, type=int, help='Begin block index of fine-tuning') - # parser.add_argument('--no_train', action='store_true', help='If true, training is not performed.') - # parser.set_defaults(no_train=False) - # parser.add_argument('--no_val', action='store_true', help='If true, validation is not performed.') - # parser.set_defaults(no_val=False) - # parser.add_argument('--test', action='store_true', help='If true, test is performed.') - # parser.set_defaults(test=True) + parser.add_argument('--no_train', action='store_true', help='If true, training is not performed.') + parser.set_defaults(no_train=False) + parser.add_argument('--no_val', action='store_true', help='If true, validation is not performed.') + parser.set_defaults(no_val=False) + parser.add_argument('--test', action='store_true', help='If true, test is performed.') + parser.set_defaults(test=True) parser.add_argument('--test_subset', default='val', type=str, help='Used subset in test (val | test)') parser.add_argument('--scale_in_test', default=1.0, type=float, help='Spatial scale in test') parser.add_argument('--crop_position_in_test', default='c', type=str, help='Cropping method (c | tl | tr | bl | br) in test') @@ -349,16 +348,23 @@ def parse_opts_online(): parser.add_argument('--no_cuda', action='store_true', help='If true, cuda is not used.') parser.set_defaults(no_cuda=False) parser.add_argument('--n_threads', default=4, type=int, help='Number of threads for multi-thread loading') - # parser.add_argument('--checkpoint', default=10, type=int, help='Trained model is saved at every this epochs.') - # parser.add_argument('--no_hflip', action='store_true', help='If true holizontal flipping is not performed.') - # parser.set_defaults(no_hflip=False) + parser.add_argument('--checkpoint', default=10, type=int, help='Trained model is saved at every this epochs.') + parser.add_argument('--no_hflip', action='store_true', help='If true holizontal flipping is not performed.') + parser.set_defaults(no_hflip=False) parser.add_argument('--norm_value', default=1, type=int, help='If 1, range of inputs is [0-255]. If 255, range of inputs is [0-1].') + parser.add_argument('--model_det', default='resnet', type=str, help='(resnet | preresnet | wideresnet | resnext | densenet | ') parser.add_argument('--model_depth_det', default=18, type=int, help='Depth of resnet (10 | 18 | 34 | 50 | 101)') parser.add_argument('--resnet_shortcut_det', default='B', type=str, help='Shortcut type of resnet (A | B)') parser.add_argument('--wide_resnet_k_det', default=2, type=int, help='Wide resnet k') parser.add_argument('--resnext_cardinality_det', default=32, type=int, help='ResNeXt cardinality') + parser.add_argument('--model', default='resnet', type=str, help='(resnet | preresnet | wideresnet | resnext | densenet | ') + parser.add_argument('--model_depth', default=18, type=int, help='Depth of resnet (10 | 18 | 34 | 50 | 101)') + parser.add_argument('--resnet_shortcut', default='B', type=str, help='Shortcut type of resnet (A | B)') + parser.add_argument('--wide_resnet_k', default=2, type=int, help='Wide resnet k') + parser.add_argument('--resnext_cardinality', default=32, type=int, help='ResNeXt cardinality') + parser.add_argument('--model_clf', default='resnet', type=str, help='(resnet | preresnet | wideresnet | resnext | densenet | ') parser.add_argument('--model_depth_clf', default=18, type=int, help='Depth of resnet (10 | 18 | 34 | 50 | 101)') parser.add_argument('--resnet_shortcut_clf', default='B', type=str, help='Shortcut type of resnet (A | B)') @@ -366,15 +372,13 @@ def parse_opts_online(): parser.add_argument('--resnext_cardinality_clf', default=32, type=int, help='ResNeXt cardinality') parser.add_argument('--manual_seed', default=1, type=int, help='Manually set random seed') - - parser.add_argument('--det_strategy', default='raw', type=str, help='Detector filter (raw | median | ma | ewma)') parser.add_argument('--det_queue_size', default=1, type=int, help='Detector queue size') - parser.add_argument('--det_threshold', default=1, type=int, help='Number of consequtive detection') + parser.add_argument('--det_counter', default=1, type=float, help='Number of consequtive detection') parser.add_argument('--clf_strategy', default='raw', type=str, help='Classifier filter (raw | median | ma | ewma)') parser.add_argument('--clf_queue_size', default=1, type=int, help='Classifier queue size') - parser.add_argument('--clf_threshold_pre', default=1, type=int, help='Cumulative sum threshold to prepredict') - parser.add_argument('--clf_threshold_final', default=1, type=int, help='Cumulative sum threshold to predict at the end') + parser.add_argument('--clf_threshold_pre', default=1, type=float, help='Cumulative sum threshold to prepredict') + parser.add_argument('--clf_threshold_final', default=1, type=float, help='Cumulative sum threshold to predict at the end') parser.add_argument('--stride_len', default=1, type=int, help='Stride Lenght of video loader window') args = parser.parse_args() diff --git a/real_time_test.py b/real_time_test.py deleted file mode 100644 index 4badd1f..0000000 --- a/real_time_test.py +++ /dev/null @@ -1,594 +0,0 @@ -import argparse -import time -import os -import glob -import sys -import json -import shutil -import itertools -import numpy as np -import pandas as pd -import csv -import torch -import tensorflow as tf -from torch.autograd import Variable -from sklearn.metrics import confusion_matrix -from torch.nn import functional as F - -from opts import parse_opts_online -from model import generate_model -from mean import get_mean, get_std -from spatial_transforms import * -from temporal_transforms import * -from target_transforms import ClassLabel, VideoID -from target_transforms import Compose as TargetCompose -from dataset import get_online_data ,get_training_set -from utils import Logger, AverageMeter -from train import train_epoch -from validation import val_epoch -import test - -import pdb - -import matplotlib.pyplot as plt -from matplotlib.pyplot import figure -# figure(num=None, figsize=(9, 3), dpi=180, facecolor='w', edgecolor='k') - -import cv2 -import PIL -from PIL import ImageFont, ImageDraw, Image - -# Reshape a numpy array 'a' of shape (n, x) to form shape((n - window_size), window_size, x)) -def rolling_window(a, window, step_size): - a = a.transpose() - shape = a.shape[:-1] + (a.shape[-1] - window + 1 - step_size, window) - strides = a.strides + (a.strides[-1] * step_size,) - return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) - - -def weighting_func(x): - return (1 / (1 + np.exp(-0.2*(x-9)))) - - -def levenshtein(a,b): - # This is a straightforward implementation of a well-known algorithm, and thus - # probably shouldn't be covered by copyright to begin with. But in case it is, - # the author (Magnus Lie Hetland) has, to the extent possible under law, - # dedicated all copyright and related and neighboring rights to this software - # to the public domain worldwide, by distributing it under the CC0 license, - # version 1.0. This software is distributed without any warranty. For more - # information, see - "Calculates the Levenshtein distance between a and b." - n, m = len(a), len(b) - if n > m: - # Make sure n <= m, to use O(min(n,m)) space - a,b = b,a - n,m = m,n - - current = range(n+1) - for i in range(1,m+1): - previous, current = current, [i]+[0]*n - for j in range(1,n+1): - add, delete = previous[j]+1, current[j-1]+1 - change = previous[j-1] - if a[j-1] != b[i-1]: - change = change + 1 - current[j] = min(add, delete, change) - - return current[n] - - - -class Queue: - #Constructor creates a list - def __init__(self, max_size, n_classes): - self.queue = list(np.zeros((max_size, n_classes),dtype = float).tolist()) - self.max_size = max_size - #Adding elements to queue - def enqueue(self,data): - self.queue.insert(0,data) - return True - - #Removing the last element from the queue - def dequeue(self): - if len(self.queue)>0: - return self.queue.pop() - return ("Queue Empty!") - - #Getting the size of the queue - def size(self): - return len(self.queue) - - #printing the elements of the queue - def printQueue(self): - return self.queue - - #Average - def ma(self): - return np.array(self.queue[:self.max_size]).mean(axis = 0) - - #Median - def median(self): - return np.median(np.array(self.queue[:self.max_size]), axis = 0) - - #Exponential average - def ewma(self): - weights = np.exp(np.linspace(-1., 0., self.max_size)) - weights /= weights.sum() - average = weights.reshape(1,self.max_size).dot( np.array(self.queue[:self.max_size])) - return average.reshape(average.shape[1],) - - -opt = parse_opts_online() - -def load_models(opt): - opt.resume_path = opt.resume_path_det - opt.sample_duration = opt.sample_duration_det - opt.model = opt.model_det - opt.model_depth = opt.model_depth_det - opt.modality = opt.modality_det - opt.resnet_shortcut = opt.resnet_shortcut_det - opt.resnext_cardinality = opt.resnext_cardinality_det - opt.n_classes = opt.n_classes_det - opt.n_finetune_classes = opt.n_finetune_classes_det - - if opt.root_path != '': - opt.video_path = os.path.join(opt.root_path, opt.video_path) - opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) - opt.result_path = os.path.join(opt.root_path, opt.result_path) - if opt.resume_path: - opt.resume_path = os.path.join(opt.root_path, opt.resume_path) - - - - - opt.scales = [opt.initial_scale] - for i in range(1, opt.n_scales): - opt.scales.append(opt.scales[-1] * opt.scale_step) - opt.arch = '{}-{}'.format(opt.model, opt.model_depth) - opt.mean = get_mean(opt.norm_value) - opt.std = get_std(opt.norm_value) - - print(opt) - with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: - json.dump(vars(opt), opt_file) - - torch.manual_seed(opt.manual_seed) - - detector, parameters = generate_model(opt) - - if opt.resume_path: - opt.resume_path = os.path.join(opt.root_path, opt.resume_path) - print('loading checkpoint {}'.format(opt.resume_path)) - checkpoint = torch.load(opt.resume_path) - assert opt.arch == checkpoint['arch'] - - detector.load_state_dict(checkpoint['state_dict']) - - print('Detector \n', detector) - pytorch_total_params = sum(p.numel() for p in detector.parameters() if - p.requires_grad) - print("Total number of trainable parameters: ", pytorch_total_params) - - - opt.resume_path = opt.resume_path_clf - opt.sample_duration = opt.sample_duration_clf - opt.model = opt.model_clf - opt.model_depth = opt.model_depth_clf - opt.modality = opt.modality_clf - opt.resnet_shortcut = opt.resnet_shortcut_clf - opt.n_classes = opt.n_classes_clf - opt.n_finetune_classes = opt.n_finetune_classes_clf - if opt.root_path != '': - opt.video_path = os.path.join(opt.root_path, opt.video_path) - opt.annotation_path = os.path.join(opt.root_path, opt.annotation_path) - opt.result_path = os.path.join(opt.root_path, opt.result_path) - if opt.resume_path: - opt.resume_path = os.path.join(opt.root_path, opt.resume_path) - - opt.scales = [opt.initial_scale] - for i in range(1, opt.n_scales): - opt.scales.append(opt.scales[-1] * opt.scale_step) - opt.arch = '{}-{}'.format(opt.model, opt.model_depth) - opt.mean = get_mean(opt.norm_value) - opt.std = get_std(opt.norm_value) - - print(opt) - with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file: - json.dump(vars(opt), opt_file) - - torch.manual_seed(opt.manual_seed) - classifier, parameters = generate_model(opt) - - if opt.resume_path: - print('loading checkpoint {}'.format(opt.resume_path)) - checkpoint = torch.load(opt.resume_path) - assert opt.arch == checkpoint['arch'] - - classifier.load_state_dict(checkpoint['state_dict']) - - print('Classifier \n', classifier) - pytorch_total_params = sum(p.numel() for p in classifier.parameters() if - p.requires_grad) - print("Total number of trainable parameters: ", pytorch_total_params) - - return detector, classifier - -detector,classifier = load_models(opt) - -if opt.no_mean_norm and not opt.std_norm: - norm_method = Normalize([0, 0, 0], [1, 1, 1]) -elif not opt.std_norm: - norm_method = Normalize(opt.mean, [1, 1, 1]) -else: - norm_method = Normalize(opt.mean, opt.std) - - -spatial_transform = Compose([ - Scale(112), - CenterCrop(112), - ToTensor(opt.norm_value), norm_method - ]) - -target_transform = ClassLabel() - -print('run') - -det_strategy_list = ['median'] -det_queue_size_list = [4] -det_threshold_list = [2] -clf_strategy_list = [ 'median'] -clf_queue_size_list = [16] -clf_threshold_pre_list = [ 0.9, 1.0] -clf_threshold_final_list = [0.15] - -combinations_list = [det_strategy_list,\ - det_queue_size_list,\ - det_threshold_list,\ - clf_strategy_list,\ - clf_queue_size_list, \ - clf_threshold_pre_list, \ - clf_threshold_final_list] -combinations_list = list(itertools.product(*combinations_list)) - -## To get list of videos -if opt.dataset == 'egogesture': - subject_list = ['Subject{:02d}'.format(i) for i in [2, 9, 11, 14, 18, 19, 28, 31, 41, 47]] - test_paths = [] - for subject in subject_list: - for x in glob.glob(os.path.join(opt.video_path,subject,'*/*/rgb*')): - test_paths.append(x) -elif opt.dataset == 'nv': - df = pd.read_csv(os.path.join(opt.video_path,'nvgesture_test_correct_cvpr2016_v2.lst'), delimiter = ' ', header = None) - test_paths = [] - for x in df[0].values: - test_paths.append(os.path.join(opt.video_path, x.replace('path:', ''), 'sk_color_all')) - - -for comb in combinations_list: - - opt.det_strategy = comb[0] - opt.det_queue_size = comb[1] - opt.det_threshold = comb[2] - opt.clf_strategy = comb[3] - opt.clf_queue_size = comb[4] - opt.clf_threshold_pre = comb[5] - opt.clf_threshold_final = comb[6] - - detector.eval() - classifier.eval() - - levenstein_accuracies = AverageMeter() - videoidx = 0 - for path in test_paths[4:]: - if opt.dataset == 'egogesture': - opt.whole_path = path.split(os.sep, 4)[-1] - elif opt.dataset == 'nv': - opt.whole_path = path.split(os.sep, 3)[-1] - - new_row = [] - new_row_result = [] - videoidx += 1 - # Initialize the buffer for the logits - recorderma_det = [] - recorderewma_det = [] - recorderraw_det = [] - recordermedian_det = [] - recorderma_clf = [] - recorderewma_clf = [] - recorderraw_clf = [] - recordermedian_clf = [] - recordercumsum = [] - recordermaplot = [] - recordermedianplot = [] - recorderewmaplot = [] - recorder_ground_truth = [] - recorder_state = [] - recorderindexcumsum = [] - - - x = 0 - active_count = 0 - passive_count = 0 - active = False - prev_active = False - finished_prediction = None - started_prediction = None - pre_predict = False - cum_sum = np.zeros(opt.n_classes_clf,) - clf_selected_queue = np.zeros(opt.n_classes_clf,) - det_selected_queue = np.zeros(opt.n_classes_det,) - myqueue_det = Queue(opt.det_queue_size , n_classes = opt.n_classes_det) - myqueue_clf = Queue(opt.clf_queue_size, n_classes = opt.n_classes_clf ) - - - print('[{}/{}]----------------'.format(videoidx,len(test_paths))) - print(path) - test_data = get_online_data( - opt, spatial_transform, None, target_transform) - - test_loader = torch.utils.data.DataLoader( - test_data, - batch_size=opt.batch_size, - shuffle=False, - num_workers=opt.n_threads, - pin_memory=True) - - - results = [] - prev_best1 = opt.n_classes_clf - - new_row.append(path) - start_frame = int(16) - new_row.append(start_frame) - # max_i = len(list(enumerate(test_loader))) -1 - for i, (inputs, targets) in enumerate(test_loader): - if (i %100) == 0 : - print(i) - if not opt.no_cuda: - targets = targets.cuda(async=True) - ground_truth_array = np.zeros(opt.n_classes_clf +1,) - with torch.no_grad(): - inputs = Variable(inputs) - targets = Variable(targets) - if opt.modality_det == 'RGB': - inputs_det = inputs[:,:-1,-opt.sample_duration_det:,:,:] - elif opt.modality_det == 'Depth': - inputs_det = inputs[:,-1,-opt.sample_duration_det:,:,:].unsqueeze(1) - elif opt.modality_det =='RGB-D': - inputs_det = inputs[:,:,-opt.sample_duration_det:,:,:] - - outputs_det = detector(inputs_det) - outputs_det = F.softmax(outputs_det,dim=1) - outputs_det = outputs_det.cpu().numpy()[0].reshape(-1,) - - # enqueue the probabilities to the detector queue - myqueue_det.enqueue(outputs_det.tolist()) - - # Calculate moving averages - moving_average_det = myqueue_det.ma() - emoving_average_det = myqueue_det.ewma() - moving_median_det = myqueue_det.median() - - if opt.det_strategy == 'raw': - det_selected_queue = outputs_det - elif opt.det_strategy == 'median': - det_selected_queue = moving_median_det - elif opt.det_strategy == 'ma': - det_selected_queue = moving_average_det - elif opt.det_strategy == 'ewma': - det_selected_queue = emoving_average_det - - - prediction_det = np.argmax(det_selected_queue) - prob1 = det_selected_queue[prediction_det] - - if prediction_det == 1: - - if opt.modality_clf == 'RGB': - inputs_clf = inputs[:,:-1,:,:,:] - elif opt.modality_clf == 'Depth': - inputs_clf = inputs[:,-1,:,:,:].unsqueeze(1) - elif opt.modality_clf =='RGB-D': - inputs_clf = inputs[:,:,:,:,:] - - outputs_clf = classifier(inputs_clf) - - outputs_clf = F.softmax(outputs_clf,dim=1) - outputs_clf = outputs_clf.cpu().numpy()[0].reshape(-1,) - - - # Push the probabilities to queue - myqueue_clf.enqueue(outputs_clf.tolist()) - - # Calcualte moving averages - moving_average_clf = myqueue_clf.ma() - emoving_average_clf = myqueue_clf.ewma() - moving_median_clf = myqueue_clf.median() - - recordermaplot.append(moving_average_clf) - recordermedianplot.append(moving_median_clf) - recorderewmaplot.append(emoving_average_clf) - passive_count = 0 - active_count += 1 - - if opt.clf_strategy == 'raw': - clf_selected_queue = outputs_clf - elif opt.clf_strategy == 'median': - clf_selected_queue = moving_median_clf - elif opt.clf_strategy == 'ma': - clf_selected_queue = moving_average_clf - elif opt.clf_strategy == 'ewma': - clf_selected_queue = emoving_average_clf - - - best1 = np.argmax(clf_selected_queue) - prob = clf_selected_queue[best1] - - else: - outputs_clf = np.zeros(opt.n_classes_clf ,) - # Push the probabilities to queue - myqueue_clf.enqueue(outputs_clf.tolist()) - # Calcualte moving averages - moving_average_clf = myqueue_clf.ma() - emoving_average_clf = myqueue_clf.ewma() - moving_median_clf = myqueue_clf.median() - - prob = prob1 - best1 = opt.n_classes_clf - recordermaplot.append(np.zeros(opt.n_classes_clf ,)) - recordermedianplot.append(np.zeros(opt.n_classes_clf ,)) - recorderewmaplot.append(np.zeros(opt.n_classes_clf ,)) - - passive_count += 1 - active_count = 0 - - - #pdb.set_trace() - if passive_count >= opt.det_threshold: - active = False - else: - active = True - - - - if active: - recorder_state.append(1) - x += 1 - cum_sum = ((cum_sum * (x-1)) + (weighting_func(x) * clf_selected_queue))/x - #cum_sum = ((cum_sum * (x-1)) + (1.0 * clf_selected_queue))/x - - best2, best1 = tuple(cum_sum.argsort()[-2:][::1]) - if float(cum_sum[best1]- cum_sum[best2]) > opt.clf_threshold_pre: - finished_prediction = True - pre_predict = True - - else: - x = 0 - - - if active == False and prev_active == True: - finished_prediction = True - started_prediction = False - elif active == True and prev_active == False: - started_prediction = True - finished_prediction = False - - - - if finished_prediction == True: - best2, best1 = tuple(cum_sum.argsort()[-2:][::1]) - - - if cum_sum[best1]>opt.clf_threshold_final: - if pre_predict == True: - if best1 != prev_best1: - if cum_sum[best1]>opt.clf_threshold_final: - results.append(((i*opt.stride_len)+opt.sample_duration2,best1)) - print( '1 -- candidate : {} , prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration2)) - else: - if cum_sum[best1]>opt.clf_threshold_final: - if best1 == prev_best1: - if cum_sum[best1]>5: - results.append(((i*opt.stride_len)+opt.sample_duration2,best1)) - print( '2 --candidate : {} , prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration2)) - else: - results.append(((i*opt.stride_len)+opt.sample_duration2,best1)) - - print( '2 --candidate : {} , prob : {} at frame {}'.format(best1, cum_sum[best1], (i*opt.stride_len)+opt.sample_duration2)) - - - - finished_prediction = False - prev_best1 = best1 - - cum_sum = np.zeros(opt.n_classes_clf,) - - if active == False and prev_active == True: - pre_predict = False - - prev_active = active - - ground_truth_array[targets.item()] = 1.0 - recorder_ground_truth.append(ground_truth_array) - # pdb.set_trace() - # Append moving averages - recorderma_det.append(moving_average_det) - recorderewma_det.append(emoving_average_det) - recorderraw_det.append(outputs_det) - recordermedian_det.append(moving_median_det) - recordercumsum.append(cum_sum) - recorderindexcumsum.append(x) - recorderma_clf.append(moving_average_clf) - recorderewma_clf.append(emoving_average_clf) - recorderraw_clf.append(outputs_clf) - recordermedian_clf.append(moving_median_clf) - - - - ## Print outputs for the video - recordermedianplot = np.array(recordermedianplot) - recorderewmaplot = np.array(recorderewmaplot) - recordermaplot = np.array(recordermaplot) - recorderraw_det = np.array(recorderraw_det) - recordermedian_det = np.array(recordermedian_det) - recorderma_det = np.array(recorderma_det) - recorderraw_clf = np.array(recorderraw_clf) - recorderma_clf = np.array(recorderma_clf) - recordercumsum = np.array(recordercumsum) - recorderewma_clf = np.array(recorderewma_clf) - recordermedian_clf = np.array(recordermedian_clf) - recorder_ground_truth = np.array(recorder_ground_truth) - recorderall = np.concatenate([recorderraw_clf, recorderraw_det[:,1].reshape(recorderraw_det.shape[0],1)], axis = 1) - - if opt.dataset == 'egogesture': - target_csv_path = os.path.join(opt.video_path.rsplit(os.sep, 1)[0], - 'labels-final-revised1', - opt.whole_path.rsplit(os.sep,2)[0], - 'Group'+opt.whole_path[-1] + '.csv').replace('Subject', 'subject') - target_list = [] - with open(target_csv_path) as csvfile: - readCSV = csv.reader(csvfile, delimiter=',') - for row in readCSV: - target_list.append(int(row[0])-1) - elif opt.dataset == 'nv': - target_list = [] - with open('./annotation_nvGesture/vallistall.txt') as csvfile: - readCSV = csv.reader(csvfile, delimiter=' ') - for row in readCSV: - if row[0] == opt.whole_path: - if row[1] != '26' : - target_list.append(int(row[1])-1) - try: - result_list = np.array(results)[:,1] - except: - result_list = np.array([]) - - target_list = np.array(target_list) - distance = levenshtein(target_list, result_list) - - if (1-(distance/len(target_list))) <0: - levenstein_accuracies.update(0, len(target_list)) - else: - levenstein_accuracies.update(1-(distance/len(target_list)), len(target_list)) - - - print('results :',result_list) - print('targets :',target_list) - print('Accuracy = {} ({})'.format(levenstein_accuracies.val, levenstein_accuracies.avg)) - plt.plot(recorderraw_clf) - plt.show() - - - pdb.set_trace() - - - - -print('-----Evaluation is finished------') - # print('Overall Prec@1 {:.05f}% Prec@5 {:.05f}%'.format(top1.avg, top5. - - # plt.plot(recorderewma_clf) - # plt.show() - # plt.plot( pd.ewma(recorderraw_clf,com = 0.5, min_periods = 1)) # Exponential weigted moving average - # plt.plot( pd.rolling_mean(recorderraw_clf, 5, min_periods = 1)) # Moving average diff --git a/results/opts.json b/results/opts.json index 5246e98..a27c387 100644 --- a/results/opts.json +++ b/results/opts.json @@ -1 +1 @@ -{"root_path": "/usr/home/kop/", "video_path": "/data2/EgoGesture/images", "annotation_path": "/usr/home/kop/Real-time-GesRec/annotation_EgoGesture/egogestureall_but_None.json", "result_path": "/usr/home/kop/Real-time-GesRec/results", "store_name": "model", "modality": "RGB-D", "dataset": "egogesture", "n_classes": 83, "n_finetune_classes": 83, "sample_size": 112, "sample_duration": 32, "initial_scale": 1.0, "n_scales": 5, "scale_step": 0.84089641525, "train_crop": "random", "learning_rate": 0.01, "lr_steps": [10, 25, 50, 80, 100], "momentum": 0.9, "dampening": 0.9, "weight_decay": 0.001, "mean_dataset": "activitynet", "no_mean_norm": false, "std_norm": false, "nesterov": false, "optimizer": "sgd", "lr_patience": 10, "batch_size": 8, "n_epochs": 100, "begin_epoch": 1, "n_val_samples": 1, "resume_path": "", "pretrain_path": "", "ft_begin_index": 0, "no_train": false, "no_val": false, "test": false, "test_subset": "test", "train_validate": false, "scale_in_test": 1.0, "crop_position_in_test": "c", "no_softmax_in_test": false, "no_cuda": false, "n_threads": 16, "checkpoint": 1, "no_hflip": false, "norm_value": 1, "model": "c3d", "model_depth": 10, "resnet_shortcut": "B", "wide_resnet_k": 2, "resnext_cardinality": 32, "manual_seed": 1, "weighted": false, "scales": [1.0, 0.84089641525, 0.7071067811803005, 0.5946035574934808, 0.4999999999911653], "arch": "c3d-10", "mean": [114.7748, 107.7354, 99.475], "std": [38.7568578, 37.88248729, 40.02898126]} \ No newline at end of file +{"root_path": "/usr/home/kop/", "video_path": "/data2/EgoGesture/images", "annotation_path": "/usr/home/kop/Real-time-GesRec/annotation_EgoGesture/egogesturebinary.json", "result_path": "/usr/home/kop/Real-time-GesRec/results", "store_name": "model", "modality": "Depth", "dataset": "egogesture", "n_classes": 2, "n_finetune_classes": 2, "sample_size": 112, "sample_duration": 8, "initial_scale": 1.0, "n_scales": 5, "scale_step": 0.84089641525, "train_crop": "random", "learning_rate": 0.01, "lr_steps": [10, 25, 50, 80, 100], "momentum": 0.9, "dampening": 0.9, "weight_decay": 0.001, "mean_dataset": "activitynet", "no_mean_norm": false, "std_norm": false, "nesterov": false, "optimizer": "sgd", "lr_patience": 10, "batch_size": 8, "n_epochs": 100, "begin_epoch": 1, "n_val_samples": 1, "resume_path": "/usr/home/kop/MyRes3D-Ahmet/report/egogesture_resnetl_10_Depth_8_9939.pth", "pretrain_path": "", "ft_begin_index": 0, "no_train": false, "no_val": false, "test": false, "test_subset": "test", "train_validate": false, "scale_in_test": 1.0, "crop_position_in_test": "c", "no_softmax_in_test": false, "no_cuda": false, "n_threads": 16, "checkpoint": 1, "no_hflip": false, "norm_value": 1, "model": "resnetl", "model_depth": 10, "resnet_shortcut": "A", "wide_resnet_k": 2, "resnext_cardinality": 32, "manual_seed": 1, "weighted": false, "scales": [1.0, 0.84089641525, 0.7071067811803005, 0.5946035574934808, 0.4999999999911653], "arch": "resnetl-10", "mean": [114.7748, 107.7354, 99.475], "std": [38.7568578, 37.88248729, 40.02898126]} \ No newline at end of file diff --git a/run_offline.sh b/run_offline.sh index fe9a7f7..03ed2b6 100644 --- a/run_offline.sh +++ b/run_offline.sh @@ -1,23 +1,24 @@ #!/bin/bash -python main.py \ +python offline_test.py \ --root_path ~/ \ --video_path /data2/EgoGesture/images \ - --annotation_path ~/Real-time-GesRec/annotation_EgoGesture/egogestureall_but_None.json\ + --annotation_path ~/Real-time-GesRec/annotation_EgoGesture/egogesturebinary.json\ --result_path ~/Real-time-GesRec/results \ + --resume_path MyRes3D-Ahmet/report/egogesture_resnetl_10_Depth_8_9939.pth \ --dataset egogesture \ - --sample_duration 32 \ - --learning_rate 0.01 \ - --model c3d \ + --sample_duration 8 \ + --learning_rate 0.01 \ + --model resnetl \ --model_depth 10 \ - --resnet_shortcut B \ + --resnet_shortcut A \ --batch_size 8 \ - --n_classes 83 \ - --n_finetune_classes 83 \ + --n_classes 2 \ + --n_finetune_classes 2 \ --n_threads 16 \ --checkpoint 1 \ - --modality RGB-D \ + --modality Depth \ --train_crop random \ --n_val_samples 1 \ --test_subset test \ - --n_epochs 100 \ + --n_epochs 100 \ diff --git a/run_online.sh b/run_online.sh index 8eb4e23..59fcb56 100644 --- a/run_online.sh +++ b/run_online.sh @@ -1,9 +1,8 @@ #!/bin/bash -python real_time_test.py \ +python online_test.py \ --root_path ~/\ --video_path /data2/EgoGesture/images \ - --whole_path Subject09/Scene6/Color/rgb1 \ - --annotation_path MyRes3D-Ahmet/annotation_EgoGesture/egogestureall.json \ + --annotation_path Real-time-GesRec/annotation_EgoGesture/egogestureall.json \ --resume_path_det MyRes3D-Ahmet/report/egogesture_resnetl_10_Depth_8_9939.pth \ --resume_path_clf MyRes3D-Ahmet/report/egogesture_resnext_101_Depth_32_9403.pth \ --result_path MyRes3D-Ahmet/results \ @@ -22,16 +21,17 @@ python real_time_test.py \ --n_classes_clf 83 \ --n_finetune_classes_clf 83 \ --n_threads 16 \ + --checkpoint 1 \ --modality_det Depth \ --modality_clf Depth \ + --n_val_samples 1 \ --train_crop random \ --test_subset test \ - --det_strategy raw \ + --det_strategy median \ --det_queue_size 4 \ - --det_threshold 1 \ - --clf_strategy raw \ - --clf_queue_size 2 \ - --clf_threshold_pre 2 \ - --clf_threshold_final 1 \ - --stride_len 1 \ - + --det_counter 2 \ + --clf_strategy median \ + --clf_queue_size 16 \ + --clf_threshold_pre 0.6 \ + --clf_threshold_final 0.15 \ + --stride_len 1 \ \ No newline at end of file diff --git a/utils.py b/utils.py index af1bfa0..be9ca77 100644 --- a/utils.py +++ b/utils.py @@ -1,7 +1,7 @@ import csv import pdb from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix - +import numpy as np class AverageMeter(object): @@ -44,6 +44,78 @@ def log(self, values): self.logger.writerow(write_values) self.log_file.flush() +class Queue: + #Constructor creates a list + def __init__(self, max_size, n_classes): + self.queue = list(np.zeros((max_size, n_classes),dtype = float).tolist()) + self.max_size = max_size + self.median = None + self.ma = None + self.ewma = None + #Adding elements to queue + def enqueue(self,data): + self.queue.insert(0,data) + self.median = self._median() + self.ma = self._ma() + self.ewma = self._ewma() + return True + + #Removing the last element from the queue + def dequeue(self): + if len(self.queue)>0: + return self.queue.pop() + return ("Queue Empty!") + + #Getting the size of the queue + def size(self): + return len(self.queue) + + #printing the elements of the queue + def printQueue(self): + return self.queue + + #Average + def _ma(self): + return np.array(self.queue[:self.max_size]).mean(axis = 0) + + #Median + def _median(self): + return np.median(np.array(self.queue[:self.max_size]), axis = 0) + + #Exponential average + def _ewma(self): + weights = np.exp(np.linspace(-1., 0., self.max_size)) + weights /= weights.sum() + average = weights.reshape(1,self.max_size).dot( np.array(self.queue[:self.max_size])) + return average.reshape(average.shape[1],) + +def LevenshteinDistance(a,b): + # This is a straightforward implementation of a well-known algorithm, and thus + # probably shouldn't be covered by copyright to begin with. But in case it is, + # the author (Magnus Lie Hetland) has, to the extent possible under law, + # dedicated all copyright and related and neighboring rights to this software + # to the public domain worldwide, by distributing it under the CC0 license, + # version 1.0. This software is distributed without any warranty. For more + # information, see + "Calculates the Levenshtein distance between a and b." + n, m = len(a), len(b) + if n > m: + # Make sure n <= m, to use O(min(n,m)) space + a,b = b,a + n,m = m,n + + current = range(n+1) + for i in range(1,m+1): + previous, current = current, [i]+[0]*n + for j in range(1,n+1): + add, delete = previous[j]+1, current[j-1]+1 + change = previous[j-1] + if a[j-1] != b[i-1]: + change = change + 1 + current[j] = min(add, delete, change) + + return current[n] + def load_value_file(file_path): with open(file_path, 'r') as input_file: