diff --git a/detector/bbGt.m b/detector/bbGt.m index 0b2da2a1..a9f7f92c 100644 --- a/detector/bbGt.m +++ b/detector/bbGt.m @@ -198,7 +198,20 @@ % .oRng - [] range of acceptable obj orientations (angles) % .xRng - [] range of x coordinates of bb extent % .yRng - [] range of y coordinates of bb extent +% .zRng - [] range of acceptable z point coordinates % .vRng - [] range of acceptable obj occlusion levels +% .name - [] regexp on image source name to ignore +% .invName - [0] invert behavior of name, setting non-matches ignore +% .database - [] regexp on database name to ignore +% .invData - [0] invert behavior of database, setting non-matches to +% ignore +% .ignDiff - [1] ignore labels marked as "difficult" in Pascal VOC +% format (1). Has no effect in other formats. Valid values +% and effects on use of difficult lables are +% 0 - labels are used as normal +% 1 - labels are set to ignore +% -1 - difficult labels are used, and all labels not set as +% difficult are set to ignore % % OUTPUTS % objs - loaded objects @@ -210,11 +223,16 @@ % get parameters df={'format',0,'ellipse',1,'squarify',[],'lbls',[],'ilbls',[],'hRng',[],... - 'wRng',[],'aRng',[],'arRng',[],'oRng',[],'xRng',[],'yRng',[],'vRng',[]}; -[format,ellipse,sqr,lbls,ilbls,hRng,wRng,aRng,arRng,oRng,xRng,yRng,vRng]... + 'wRng',[],'aRng',[],'arRng',[],'oRng',[],'xRng',[],'yRng',[],... + 'zRng',[],'vRng',[],... + 'name',[],'invName',0,'database',[],'invData',0,'ignDiff',1}; +[format,ellipse,sqr,lbls,ilbls,hRng,wRng,aRng,arRng,oRng,xRng,yRng,zRng,vRng,... + name,invName,database,invData,ignDiff]... = getPrmDflt(varargin,df,1); % load objs +nameVal = []; +dbVal = []; if( format==0 ) % load objs stored in default format fId=fopen(fName); @@ -237,19 +255,41 @@ % load objs stored in PASCAL VOC format if(exist('PASreadrecord.m','file')~=2) error('bbLoad() requires the PASCAL VOC code.'); end - os=PASreadrecord(fName); os=os.objects; + parsed=PASreadrecord(fName); + nameVal = parsed.source.image; + dbVal = parsed.database; + os=parsed.objects; n=length(os); objs=create(n); if(~isfield(os,'occluded')), for i=1:n, os(i).occluded=0; end; end + for i=1:n + if(isfield(os(i),'point')) && isfield(os(i).point, 'z') + objs(i).z=os(i).point.z; + else + objs(i).z=nan; + end; + end for i=1:n bb=os(i).bbox; bb(3)=bb(3)-bb(1); bb(4)=bb(4)-bb(2); objs(i).bb=bb; - objs(i).lbl=os(i).class; objs(i).ign=os(i).difficult; + objs(i).lbl=os(i).class; + switch ignDiff + case 1 + objs(i).ign=os(i).difficult; + case -1 + objs(i).ign=not(os(i).difficult); + case 0 + otherwise + error('Invalid value for ignDiff') + end objs(i).occ=os(i).occluded || os(i).truncated; if(objs(i).occ), objs(i).bbv=bb; end end elseif( format==2 ) if(exist('VOCreadxml.m','file')~=2) error('bbLoad() requires the ImageNet dev code.'); end - os=VOCreadxml(fName); os=os.annotation; + parsed=VOCreadxml(fName); + nameVal = parsed.annotation.source.image; + dbVal = parsed.annotation.source.database; + os=parsed.annotation; if(isfield(os,'object')), os=os.object; else os=[]; end n=length(os); objs=create(n); for i=1:n @@ -260,6 +300,20 @@ else error('bbLoad() unknown format: %i',format); end +% Filter on image source and database name +filteredAll = false; +if ~isempty(nameVal) && ~isempty(name) + match = regexp(nameVal, name, 'once'); + filteredAll = filteredAll | xor(numel(match), invName); +end +if ~isempty(dbVal) && ~isempty(database) + match = regexp(dbVal, database, 'once'); + filteredAll = filteredAll | xor(numel(match), invData); +end +if filteredAll + for i=1:n, objs(i).ign = 1; end +end + % only keep objects whose lbl is in lbls or ilbls if(~isempty(lbls) || ~isempty(ilbls)), K=true(n,1); for i=1:n, K(i)=any(strcmp(objs(i).lbl,[lbls ilbls])); end @@ -293,6 +347,8 @@ v=(bbv(3)*bbv(4))/(bb(3)*bb(4)); end objs(i).ign = objs(i).ign || vvRng(2); end end +if(~isempty(zRng)), for i=1:n, v=objs(i).z; % Handle NaN entries + objs(i).ign = objs(i).ign || ~(v>zRng(1) && v='A'&lbl<='Z'); + t=t(t>=4); + if ~isempty(t) + record.objects(obj).view=lbl(t(1):end); + lbl(t(1):end)=[]; + else + record.objects(obj).view=''; + end + record.objects(obj).class=lbl(4:end); + + otherwise, %fprintf('Skipping: %s\n',line); + end; + end; + end; + fclose(fd); +return + +function matchnum=match(line,matchstrs) + for i=1:length(matchstrs), + matched(i)=strncmp(line,matchstrs(i).str,matchstrs(i).matchlen); + end; + matchnum=find(matched); + if isempty(matchnum), matchnum=0; end; + if (length(matchnum)~=1), + PASerrmsg('Multiple matches while parsing',''); + end; +return + +function s=initstrings + s(1).matchlen=14; + s(1).str='Image filename : %q'; + + s(2).matchlen=10; + s(2).str='Image size (X x Y x C) : %d x %d x %d'; + + s(3).matchlen=8; + s(3).str='Database : %q'; + + s(4).matchlen=8; + s(4).str='Bounding box for object %d %q (Xmin, Ymin) - (Xmax, Ymax) : (%d, %d) - (%d, %d)'; + + s(5).matchlen=7; + s(5).str='Polygon for object %d %q (X, Y)'; + + s(6).matchlen=5; + s(6).str='Pixel mask for object %d %q : %q'; + + s(7).matchlen=8; + s(7).str='Original label for object %d %q : %q'; + +return \ No newline at end of file diff --git a/external/VOCdevkit/VOCcode/VOCap.m b/external/VOCdevkit/VOCcode/VOCap.m new file mode 100755 index 00000000..c2842400 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCap.m @@ -0,0 +1,10 @@ +function ap = VOCap(rec,prec) + +mrec=[0 ; rec ; 1]; +mpre=[0 ; prec ; 0]; +for i=numel(mpre)-1:-1:1 + mpre(i)=max(mpre(i),mpre(i+1)); +end +i=find(mrec(2:end)~=mrec(1:end-1))+1; +ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); + diff --git a/external/VOCdevkit/VOCcode/VOCevalaction.m b/external/VOCdevkit/VOCcode/VOCevalaction.m new file mode 100755 index 00000000..4290e5a2 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCevalaction.m @@ -0,0 +1,59 @@ +function [rec,prec,ap] = VOCevalaction(VOCopts,id,cls,draw) + +% load test set +[gtimg,gtobj,gt]=textread(sprintf(VOCopts.action.clsimgsetpath,cls,VOCopts.testset),'%s %d %d'); + +% hash image/object ids +gtid=cell(numel(gtimg),1); +for i=1:numel(gtimg); + gtid{i}=sprintf('%s/%d',gtimg{i},gtobj(i)); +end +hash=VOChash_init(gtid); + +% load results +[img,obj,confidence]=textread(sprintf(VOCopts.action.respath,id,cls),'%s %d %f'); + +% map results to ground truth objects +out=ones(size(gt))*-inf; +tic; +for i=1:length(img) + % display progress + if toc>1 + fprintf('%s: pr: %d/%d\n',cls,i,length(img)); + drawnow; + tic; + end + + % find ground truth object + k=sprintf('%s/%d',img{i},obj(i)); + j=VOChash_lookup(hash,k); + if isempty(j) + error('unrecognized object "%s"',k); + elseif length(j)>1 + error('multiple image "%s"',k); + else + out(j)=confidence(i); + end +end + +% compute precision/recall + +[so,si]=sort(-out); +tp=gt(si)>0; +fp=gt(si)<0; + +fp=cumsum(fp); +tp=cumsum(tp); +rec=tp/sum(gt>0); +prec=tp./(fp+tp); + +ap=VOCap(rec,prec); + +if draw + % plot precision/recall + plot(rec,prec,'-'); + grid; + xlabel 'recall' + ylabel 'precision' + title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); +end diff --git a/external/VOCdevkit/VOCcode/VOCevalcls.m b/external/VOCdevkit/VOCcode/VOCevalcls.m new file mode 100755 index 00000000..86524eff --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCevalcls.m @@ -0,0 +1,54 @@ +function [rec,prec,ap] = VOCevalcls(VOCopts,id,cls,draw) + +% load test set +[gtids,gt]=textread(sprintf(VOCopts.clsimgsetpath,cls,VOCopts.testset),'%s %d'); + +% hash image ids +hash=VOChash_init(gtids); + +% load results +[ids,confidence]=textread(sprintf(VOCopts.clsrespath,id,cls),'%s %f'); + +% map results to ground truth images +out=ones(size(gt))*-inf; +tic; +for i=1:length(ids) + % display progress + if toc>1 + fprintf('%s: pr: %d/%d\n',cls,i,length(ids)); + drawnow; + tic; + end + + % find ground truth image + j=VOChash_lookup(hash,ids{i}); + if isempty(j) + error('unrecognized image "%s"',ids{i}); + elseif length(j)>1 + error('multiple image "%s"',ids{i}); + else + out(j)=confidence(i); + end +end + +% compute precision/recall + +[so,si]=sort(-out); +tp=gt(si)>0; +fp=gt(si)<0; + +fp=cumsum(fp); +tp=cumsum(tp); +rec=tp/sum(gt>0); +prec=tp./(fp+tp); + +ap=VOCap(rec,prec); + +if draw + % plot precision/recall + plot(rec,prec,'-'); + grid; + xlabel 'recall' + ylabel 'precision' + title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); +end diff --git a/external/VOCdevkit/VOCcode/VOCevaldet.m b/external/VOCdevkit/VOCcode/VOCevaldet.m new file mode 100755 index 00000000..f28a7d44 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCevaldet.m @@ -0,0 +1,123 @@ +function [rec,prec,ap] = VOCevaldet(VOCopts,id,cls,draw) + +% load test set + +cp=sprintf(VOCopts.annocachepath,VOCopts.testset); +if exist(cp,'file') + fprintf('%s: pr: loading ground truth\n',cls); + load(cp,'gtids','recs'); +else + [gtids,t]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); + for i=1:length(gtids) + % display progress + if toc>1 + fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + + % read annotation + recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); + end + save(cp,'gtids','recs'); +end + +fprintf('%s: pr: evaluating detections\n',cls); + +% hash image ids +hash=VOChash_init(gtids); + +% extract ground truth objects + +npos=0; +gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); +for i=1:length(gtids) + % extract objects of class + clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); + gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; + gt(i).diff=[recs(i).objects(clsinds).difficult]; + gt(i).det=false(length(clsinds),1); + npos=npos+sum(~gt(i).diff); +end + +% load results +[ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); +BB=[b1 b2 b3 b4]'; + +% sort detections by decreasing confidence +[sc,si]=sort(-confidence); +ids=ids(si); +BB=BB(:,si); + +% assign detections to ground truth objects +nd=length(confidence); +tp=zeros(nd,1); +fp=zeros(nd,1); +tic; +for d=1:nd + % display progress + if toc>1 + fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); + drawnow; + tic; + end + + % find ground truth image + i=VOChash_lookup(hash,ids{d}); + if isempty(i) + error('unrecognized image "%s"',ids{d}); + elseif length(i)>1 + error('multiple image "%s"',ids{d}); + end + + % assign detection to ground truth object if any + bb=BB(:,d); + ovmax=-inf; + for j=1:size(gt(i).BB,2) + bbgt=gt(i).BB(:,j); + bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; + iw=bi(3)-bi(1)+1; + ih=bi(4)-bi(2)+1; + if iw>0 & ih>0 + % compute overlap as area of intersection / area of union + ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... + (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... + iw*ih; + ov=iw*ih/ua; + if ov>ovmax + ovmax=ov; + jmax=j; + end + end + end + % assign detection as true positive/don't care/false positive + if ovmax>=VOCopts.minoverlap + if ~gt(i).diff(jmax) + if ~gt(i).det(jmax) + tp(d)=1; % true positive + gt(i).det(jmax)=true; + else + fp(d)=1; % false positive (multiple detection) + end + end + else + fp(d)=1; % false positive + end +end + +% compute precision/recall +fp=cumsum(fp); +tp=cumsum(tp); +rec=tp/npos; +prec=tp./(fp+tp); + +ap=VOCap(rec,prec); + +if draw + % plot precision/recall + plot(rec,prec,'-'); + grid; + xlabel 'recall' + ylabel 'precision' + title(sprintf('class: %s, subset: %s, AP = %.3f',cls,VOCopts.testset,ap)); +end diff --git a/external/VOCdevkit/VOCcode/VOCevallayout_pr.m b/external/VOCdevkit/VOCcode/VOCevallayout_pr.m new file mode 100755 index 00000000..950e128d --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCevallayout_pr.m @@ -0,0 +1,145 @@ +function [rec,prec,ap] = VOCevallayout_pr(VOCopts,id,draw) + +% load test set +[imgids,objids]=textread(sprintf(VOCopts.layout.imgsetpath,VOCopts.testset),'%s %d'); + +% hash image ids +hash=VOChash_init(imgids); + +% load ground truth objects + +tic; +n=0; +np=zeros(VOCopts.nparts,1); +for i=1:length(imgids) + % display progress + if toc>1 + fprintf('layout pr: load %d/%d\n',i,length(imgids)); + drawnow; + tic; + end + + % read annotation + r=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + + % extract object + n=n+1; + o=r.objects(objids(i)); + gt(n)=o; + + for j=1:numel(o.part) + c=strmatch(o.part(j).class,VOCopts.parts,'exact'); + np(c)=np(c)+1; + end +end + +% load results + +fprintf('layout pr: loading results\n'); +xml=VOCreadxml(sprintf(VOCopts.layout.respath,id)); + +% test detections by decreasing confidence + +[t,si]=sort(-str2double({xml.results.layout.confidence})); +nd=numel(si); + +det=false(n,1); + +ptp=[]; +pfp=[]; +pc=[]; + +for di=1:nd + + % display progress + if toc>1 + fprintf('layout pr: compute: %d/%d\n',di,nd); + drawnow; + tic; + end + + % match result to ground truth object + d=xml.results.layout(si(di)); + ii=VOChash_lookup(hash,d.image); + oi=ii(objids(ii)==str2num(d.object)); + + if isempty(oi) + warning('unrecognized layout: image %s, object %s',d.image,d.object); + continue + end + + if det(oi) + warning('duplicate layout: image %s, object %s',d.image,d.object); + continue + end + det(oi)=true; + o=gt(oi); + + % assign parts to ground truth parts + + gtd=false(numel(o.part),1); + da=zeros(numel(d.part),1); + dc=zeros(numel(d.part),1); + for i=1:numel(d.part) + dc(i)=strmatch(d.part(i).class,VOCopts.parts,'exact'); + bb=str2double({d.part(i).bndbox.xmin d.part(i).bndbox.ymin ... + d.part(i).bndbox.xmax d.part(i).bndbox.ymax}); + + ovmax=-inf; + for j=1:numel(o.part) + if strcmp(d.part(i).class,o.part(j).class) + bbgt=o.part(j).bbox; + bi=[max(bb(1),bbgt(1)) + max(bb(2),bbgt(2)) + min(bb(3),bbgt(3)) + min(bb(4),bbgt(4))]; + iw=bi(3)-bi(1)+1; + ih=bi(4)-bi(2)+1; + if iw>0 & ih>0 + % compute overlap as area of intersection / area of union + ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... + (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... + iw*ih; + ov=iw*ih/ua; + if ov>ovmax + ovmax=ov; + jmax=j; + end + end + end + end + if ovmax>=VOCopts.minoverlap && ~gtd(jmax) + da(i)=jmax; + gtd(jmax)=true; + end + end + + ptp=[ptp ; da~=0]; + pfp=[pfp ; da==0]; + pc=[pc ; dc]; +end + +% evaluate each part type + +for i=1:VOCopts.nparts + + % compute precision/recall + + fpi=cumsum(pfp(pc==i)); + tpi=cumsum(ptp(pc==i)); + v=tpi+fpi>0; + rec{i}=tpi(v)/np(i); + prec{i}=tpi(v)./(fpi(v)+tpi(v)); + + ap{i}=VOCap(rec{i},prec{i}); + + if draw + % plot precision/recall + subplot(VOCopts.nparts,1,i); + plot(rec{i},prec{i},'-'); + grid; + xlabel 'recall' + ylabel 'precision' + title(sprintf('subset: %s, part: %s, AP = %.3f',VOCopts.testset,VOCopts.parts{i},ap{i})); + end +end diff --git a/external/VOCdevkit/VOCcode/VOCevalseg.m b/external/VOCdevkit/VOCcode/VOCevalseg.m new file mode 100755 index 00000000..6fb7eae2 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCevalseg.m @@ -0,0 +1,92 @@ +%VOCEVALSEG Evaluates a set of segmentation results. +% VOCEVALSEG(VOCopts,ID); prints out the per class and overall +% segmentation accuracies. Accuracies are given using the intersection/union +% metric: +% true positives / (true positives + false positives + false negatives) +% +% [ACCURACIES,AVACC,CONF] = VOCEVALSEG(VOCopts,ID) returns the per class +% percentage ACCURACIES, the average accuracy AVACC and the confusion +% matrix CONF. +% +% [ACCURACIES,AVACC,CONF,RAWCOUNTS] = VOCEVALSEG(VOCopts,ID) also returns +% the unnormalised confusion matrix, which contains raw pixel counts. +function [accuracies,avacc,conf,rawcounts] = VOCevalseg(VOCopts,id) + +% image test set +[gtids,t]=textread(sprintf(VOCopts.seg.imgsetpath,VOCopts.testset),'%s %d'); + +% number of labels = number of classes plus one for the background +num = VOCopts.nclasses+1; +confcounts = zeros(num); +count=0; +tic; +for i=1:length(gtids) + % display progress + if toc>1 + fprintf('test confusion: %d/%d\n',i,length(gtids)); + drawnow; + tic; + end + + imname = gtids{i}; + + % ground truth label file + gtfile = sprintf(VOCopts.seg.clsimgpath,imname); + [gtim,map] = imread(gtfile); + gtim = double(gtim); + + % results file + resfile = sprintf(VOCopts.seg.clsrespath,id,VOCopts.testset,imname); + [resim,map] = imread(resfile); + resim = double(resim); + + % Check validity of results image + maxlabel = max(resim(:)); + if (maxlabel>VOCopts.nclasses), + error('Results image ''%s'' has out of range value %d (the value should be <= %d)',imname,maxlabel,VOCopts.nclasses); + end + + szgtim = size(gtim); szresim = size(resim); + if any(szgtim~=szresim) + error('Results image ''%s'' is the wrong size, was %d x %d, should be %d x %d.',imname,szresim(1),szresim(2),szgtim(1),szgtim(2)); + end + + %pixel locations to include in computation + locs = gtim<255; + + % joint histogram + sumim = 1+gtim+resim*num; + hs = histc(sumim(locs),1:num*num); + count = count + numel(find(locs)); + confcounts(:) = confcounts(:) + hs(:); +end + +% confusion matrix - first index is true label, second is inferred label +%conf = zeros(num); +conf = 100*confcounts./repmat(1E-20+sum(confcounts,2),[1 size(confcounts,2)]); +rawcounts = confcounts; + +% Percentage correct labels measure is no longer being used. Uncomment if +% you wish to see it anyway +%overall_acc = 100*sum(diag(confcounts)) / sum(confcounts(:)); +%fprintf('Percentage of pixels correctly labelled overall: %6.3f%%\n',overall_acc); + +accuracies = zeros(VOCopts.nclasses,1); +fprintf('Accuracy for each class (intersection/union measure)\n'); +for j=1:num + + gtj=sum(confcounts(j,:)); + resj=sum(confcounts(:,j)); + gtjresj=confcounts(j,j); + % The accuracy is: true positive / (true positive + false positive + false negative) + % which is equivalent to the following percentage: + accuracies(j)=100*gtjresj/(gtj+resj-gtjresj); + + clname = 'background'; + if (j>1), clname = VOCopts.classes{j-1};end; + fprintf(' %14s: %6.3f%%\n',clname,accuracies(j)); +end +accuracies = accuracies(1:end); +avacc = mean(accuracies); +fprintf('-------------------------\n'); +fprintf('Average accuracy: %6.3f%%\n',avacc); diff --git a/external/VOCdevkit/VOCcode/VOChash_init.m b/external/VOCdevkit/VOCcode/VOChash_init.m new file mode 100755 index 00000000..4c2ee477 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOChash_init.m @@ -0,0 +1,14 @@ +function hash = VOChash_init(strs) + +hsize=4999; +hash.key=cell(hsize,1); +hash.val=cell(hsize,1); + +for i=1:numel(strs) + s=strs{i}; + h=mod(str2double(s([3:4 6:11 13:end])),hsize)+1; + j=numel(hash.key{h})+1; + hash.key{h}{j}=strs{i}; + hash.val{h}(j)=i; +end + diff --git a/external/VOCdevkit/VOCcode/VOChash_lookup.m b/external/VOCdevkit/VOCcode/VOChash_lookup.m new file mode 100755 index 00000000..c2ea9c15 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOChash_lookup.m @@ -0,0 +1,5 @@ +function ind = VOChash_lookup(hash,s) + +hsize=numel(hash.key); +h=mod(str2double(s([3:4 6:11 13:end])),hsize)+1; +ind=hash.val{h}(strmatch(s,hash.key{h},'exact')); diff --git a/external/VOCdevkit/VOCcode/VOCinit.m b/external/VOCdevkit/VOCcode/VOCinit.m new file mode 100755 index 00000000..0eedb2e5 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCinit.m @@ -0,0 +1,142 @@ +clear VOCopts + +% dataset +% +% Note for experienced users: the VOC2008-11 test sets are subsets +% of the VOC2012 test set. You don't need to do anything special +% to submit results for VOC2008-11. + +VOCopts.dataset='VOC2012'; + +% get devkit directory with forward slashes +devkitroot=strrep(fileparts(fileparts(mfilename('fullpath'))),'\','/'); + +% change this path to point to your copy of the PASCAL VOC data +VOCopts.datadir=[devkitroot '/']; + +% change this path to a writable directory for your results +VOCopts.resdir=[devkitroot '/results/' VOCopts.dataset '/']; + +% change this path to a writable local directory for the example code +VOCopts.localdir=[devkitroot '/local/' VOCopts.dataset '/']; + +% initialize the training set + +VOCopts.trainset='train'; % use train for development +% VOCopts.trainset='trainval'; % use train+val for final challenge + +% initialize the test set + +VOCopts.testset='val'; % use validation data for development test set +% VOCopts.testset='test'; % use test set for final challenge + +% initialize main challenge paths + +VOCopts.annopath=[VOCopts.datadir VOCopts.dataset '/Annotations/%s.xml']; +VOCopts.imgpath=[VOCopts.datadir VOCopts.dataset '/JPEGImages/%s.jpg']; +VOCopts.imgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Main/%s.txt']; +VOCopts.clsimgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Main/%s_%s.txt']; +VOCopts.clsrespath=[VOCopts.resdir 'Main/%s_cls_' VOCopts.testset '_%s.txt']; +VOCopts.detrespath=[VOCopts.resdir 'Main/%s_det_' VOCopts.testset '_%s.txt']; + +% initialize segmentation task paths + +VOCopts.seg.clsimgpath=[VOCopts.datadir VOCopts.dataset '/SegmentationClass/%s.png']; +VOCopts.seg.instimgpath=[VOCopts.datadir VOCopts.dataset '/SegmentationObject/%s.png']; + +VOCopts.seg.imgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Segmentation/%s.txt']; + +VOCopts.seg.clsresdir=[VOCopts.resdir 'Segmentation/%s_%s_cls']; +VOCopts.seg.instresdir=[VOCopts.resdir 'Segmentation/%s_%s_inst']; +VOCopts.seg.clsrespath=[VOCopts.seg.clsresdir '/%s.png']; +VOCopts.seg.instrespath=[VOCopts.seg.instresdir '/%s.png']; + +% initialize layout task paths + +VOCopts.layout.imgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Layout/%s.txt']; +VOCopts.layout.respath=[VOCopts.resdir 'Layout/%s_layout_' VOCopts.testset '.xml']; + +% initialize action task paths + +VOCopts.action.imgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Action/%s.txt']; +VOCopts.action.clsimgsetpath=[VOCopts.datadir VOCopts.dataset '/ImageSets/Action/%s_%s.txt']; +VOCopts.action.respath=[VOCopts.resdir 'Action/%s_action_' VOCopts.testset '_%s.txt']; + +% initialize the VOC challenge options + +% classes + +VOCopts.classes={... + 'aeroplane' + 'bicycle' + 'bird' + 'boat' + 'bottle' + 'bus' + 'car' + 'cat' + 'chair' + 'cow' + 'diningtable' + 'dog' + 'horse' + 'motorbike' + 'person' + 'pottedplant' + 'sheep' + 'sofa' + 'train' + 'tvmonitor'}; + +VOCopts.nclasses=length(VOCopts.classes); + +% poses + +VOCopts.poses={... + 'Unspecified' + 'Left' + 'Right' + 'Frontal' + 'Rear'}; + +VOCopts.nposes=length(VOCopts.poses); + +% layout parts + +VOCopts.parts={... + 'head' + 'hand' + 'foot'}; + +VOCopts.nparts=length(VOCopts.parts); + +VOCopts.maxparts=[1 2 2]; % max of each of above parts + +% actions + +VOCopts.actions={... + 'other' % skip this when training classifiers + 'jumping' + 'phoning' + 'playinginstrument' + 'reading' + 'ridingbike' + 'ridinghorse' + 'running' + 'takingphoto' + 'usingcomputer' + 'walking'}; + +VOCopts.nactions=length(VOCopts.actions); + +% overlap threshold + +VOCopts.minoverlap=0.5; + +% annotation cache for evaluation + +VOCopts.annocachepath=[VOCopts.localdir '%s_anno.mat']; + +% options for example implementations + +VOCopts.exfdpath=[VOCopts.localdir '%s_fd.mat']; diff --git a/external/VOCdevkit/VOCcode/VOClabelcolormap.m b/external/VOCdevkit/VOCcode/VOClabelcolormap.m new file mode 100755 index 00000000..7ef8c84f --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOClabelcolormap.m @@ -0,0 +1,22 @@ +% VOCLABELCOLORMAP Creates a label color map such that adjacent indices have different +% colors. Useful for reading and writing index images which contain large indices, +% by encoding them as RGB images. +% +% CMAP = VOCLABELCOLORMAP(N) creates a label color map with N entries. +function cmap = labelcolormap(N) + +if nargin==0 + N=256 +end +cmap = zeros(N,3); +for i=1:N + id = i-1; r=0;g=0;b=0; + for j=0:7 + r = bitor(r, bitshift(bitget(id,1),7 - j)); + g = bitor(g, bitshift(bitget(id,2),7 - j)); + b = bitor(b, bitshift(bitget(id,3),7 - j)); + id = bitshift(id,-3); + end + cmap(i,1)=r; cmap(i,2)=g; cmap(i,3)=b; +end +cmap = cmap / 255; diff --git a/external/VOCdevkit/VOCcode/VOCreadrecxml.m b/external/VOCdevkit/VOCcode/VOCreadrecxml.m new file mode 100755 index 00000000..aaf6ae5e --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCreadrecxml.m @@ -0,0 +1,122 @@ +function rec = VOCreadrecxml(path) + +x=VOCreadxml(path); +x=x.annotation; + +rec.folder=x.folder; +rec.filename=x.filename; +rec.source.database=x.source.database; +rec.source.annotation=x.source.annotation; +rec.source.image=x.source.image; + +rec.size.width=str2double(x.size.width); +rec.size.height=str2double(x.size.height); +rec.size.depth=str2double(x.size.depth); + +rec.segmented=strcmp(x.segmented,'1'); + +rec.imgname=[x.folder '/JPEGImages/' x.filename]; +rec.imgsize=str2double({x.size.width x.size.height x.size.depth}); +rec.database=rec.source.database; + +for i=1:length(x.object) + rec.objects(i)=xmlobjtopas(x.object(i)); +end + +function p = xmlobjtopas(o) + +p.class=o.name; + +if isfield(o,'pose') + if strcmp(o.pose,'Unspecified') + p.view=''; + else + p.view=o.pose; + end +else + p.view=''; +end + +if isfield(o,'truncated') + p.truncated=strcmp(o.truncated,'1'); +else + p.truncated=false; +end + +if isfield(o,'occluded') + p.occluded=strcmp(o.occluded,'1'); +else + p.occluded=false; +end + +if isfield(o,'difficult') + p.difficult=strcmp(o.difficult,'1'); +else + p.difficult=false; +end + +p.label=['PAS' p.class p.view]; +if p.truncated + p.label=[p.label 'Trunc']; +end +if p.occluded + p.label=[p.label 'Occ']; +end +if p.difficult + p.label=[p.label 'Diff']; +end + +p.orglabel=p.label; + +p.bbox=str2double({o.bndbox.xmin o.bndbox.ymin o.bndbox.xmax o.bndbox.ymax}); + +p.bndbox.xmin=str2double(o.bndbox.xmin); +p.bndbox.ymin=str2double(o.bndbox.ymin); +p.bndbox.xmax=str2double(o.bndbox.xmax); +p.bndbox.ymax=str2double(o.bndbox.ymax); + +if isfield(o,'polygon') + warning('polygon unimplemented'); + p.polygon=[]; +else + p.polygon=[]; +end + +if isfield(o,'mask') + warning('mask unimplemented'); + p.mask=[]; +else + p.mask=[]; +end + +if isfield(o,'part')&&~isempty(o.part) + p.hasparts=true; + for i=1:length(o.part) + p.part(i)=xmlobjtopas(o.part(i)); + end +else + p.hasparts=false; + p.part=[]; +end + +if isfield(o,'point') + p.haspoint=true; + p.point.x=str2double(o.point.x); + p.point.y=str2double(o.point.y); + if isfield(o.point,'z') + p.point.z=str2double(o.point.z); + end +else + p.point=[]; +end + +if isfield(o,'actions') + p.hasactions=true; + fn=fieldnames(o.actions); + for i=1:numel(fn) + p.actions.(fn{i})=strcmp(o.actions.(fn{i}),'1'); + end +else + p.hasactions=false; + p.actions=[]; +end diff --git a/external/VOCdevkit/VOCcode/VOCreadxml.m b/external/VOCdevkit/VOCcode/VOCreadxml.m new file mode 100755 index 00000000..08abc05c --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCreadxml.m @@ -0,0 +1,10 @@ +function rec = VOCreadxml(path) + +if length(path)>5&&strcmp(path(1:5),'http:') + xml=urlread(path)'; +else + f=fopen(path,'r'); + xml=fread(f,'*char')'; + fclose(f); +end +rec=VOCxml2struct(xml); diff --git a/external/VOCdevkit/VOCcode/VOCwritexml.m b/external/VOCdevkit/VOCcode/VOCwritexml.m new file mode 100755 index 00000000..6e8f85d8 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCwritexml.m @@ -0,0 +1,40 @@ +function VOCwritexml(rec, path) + +fid=fopen(path,'w'); +writexml(fid,rec,0); +fclose(fid); + +function xml = writexml(fid,rec,depth) + +fn=fieldnames(rec); +for i=1:length(fn) + f=rec.(fn{i}); + if ~isempty(f) + if isstruct(f) + for j=1:length(f) + fprintf(fid,'%s',repmat(char(9),1,depth)); + fprintf(fid,'<%s>\n',fn{i}); + writexml(fid,rec.(fn{i})(j),depth+1); + fprintf(fid,'%s',repmat(char(9),1,depth)); + fprintf(fid,'\n',fn{i}); + end + else + if ~iscell(f) + f={f}; + end + for j=1:length(f) + fprintf(fid,'%s',repmat(char(9),1,depth)); + fprintf(fid,'<%s>',fn{i}); + if ischar(f{j}) + fprintf(fid,'%s',f{j}); + elseif isnumeric(f{j})&&numel(f{j})==1 + fprintf(fid,'%s',num2str(f{j})); + else + error('unsupported type'); + end + fprintf(fid,'\n',fn{i}); + end + end + end +end + diff --git a/external/VOCdevkit/VOCcode/VOCxml2struct.m b/external/VOCdevkit/VOCcode/VOCxml2struct.m new file mode 100755 index 00000000..caa96d09 --- /dev/null +++ b/external/VOCdevkit/VOCcode/VOCxml2struct.m @@ -0,0 +1,90 @@ +function res = VOCxml2struct(xml) + +xml(xml==9|xml==10|xml==13)=[]; + +[res,xml]=parse(xml,1,[]); + +function [res,ind]=parse(xml,ind,parent) + +res=[]; +if ~isempty(parent)&&xml(ind)~='<' + i=findchar(xml,ind,'<'); + res=trim(xml(ind:i-1)); + ind=i; + [tag,ind]=gettag(xml,i); + if ~strcmp(tag,['/' parent]) + error('<%s> closed with <%s>',parent,tag); + end +else + while ind<=length(xml) + [tag,ind]=gettag(xml,ind); + if strcmp(tag,['/' parent]) + return + else + [sub,ind]=parse(xml,ind,tag); + if isstruct(sub) + if isfield(res,tag) + n=length(res.(tag)); + fn=fieldnames(sub); + for f=1:length(fn) + res.(tag)(n+1).(fn{f})=sub.(fn{f}); + end + else + res.(tag)=sub; + end + else + if isfield(res,tag) + if ~iscell(res.(tag)) + res.(tag)={res.(tag)}; + end + res.(tag){end+1}=sub; + else + res.(tag)=sub; + end + end + end + end +end + +function i = findchar(str,ind,chr) + +i=[]; +while ind<=length(str) + if str(ind)==chr + i=ind; + break + else + ind=ind+1; + end +end + +function [tag,ind]=gettag(xml,ind) + +if ind>length(xml) + tag=[]; +elseif xml(ind)=='<' + i=findchar(xml,ind,'>'); + if isempty(i) + error('incomplete tag'); + end + tag=xml(ind+1:i-1); + ind=i+1; +else + error('expected tag'); +end + +function s = trim(s) + +for i=1:numel(s) + if ~isspace(s(i)) + s=s(i:end); + break + end +end +for i=numel(s):-1:1 + if ~isspace(s(i)) + s=s(1:i); + break + end +end + diff --git a/external/VOCdevkit/create_segmentations_from_detections.m b/external/VOCdevkit/create_segmentations_from_detections.m new file mode 100755 index 00000000..9003600d --- /dev/null +++ b/external/VOCdevkit/create_segmentations_from_detections.m @@ -0,0 +1,108 @@ +% Creates segmentation results from detection results. +% CREATE_SEGMENTATIONS_FROM_DETECTIONS(ID) creates segmentations from +% the detection results with identifier ID e.g. 'comp3'. All detections +% will be used, no matter what their confidence level. Detections are +% ranked by increasing confidence, so more confident detections occlude +% less confident detections. +% +% CREATE_SEGMENTATIONS_FROM_DETECTIONS(ID, CONFIDENCE) as above, but only +% detections above the specified confidence will be used. +function create_segmentations_from_detections(id,confidence) + +if nargin<2 + confidence = -inf; +end + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% load detection results + +n=0; +for clsnum = 1:VOCopts.nclasses + % display progress + fprintf('class %d/%d: load detections\n',clsnum,VOCopts.nclasses); + drawnow; + + resultsfile = sprintf(VOCopts.detrespath,id,VOCopts.classes{clsnum}); + if ~exist(resultsfile,'file') + error('Could not find detection results file to use to create segmentations (%s not found)',resultsfile); + end + [ids,confs,xmin,ymin,xmax,ymax]=textread(resultsfile,'%s %f %f %f %f %f'); + t=[ids num2cell(ones(numel(ids),1)*clsnum) num2cell(confs) num2cell([xmin ymin xmax ymax],2)]; + dets(n+(1:numel(ids)))=cell2struct(t,{'id' 'clsnum' 'conf' 'bbox'},2); + n=n+numel(ids); +end + +% Write out the segmentations + +segid=sprintf('comp%d',sscanf(id,'comp%d')+2); + +resultsdir = sprintf(VOCopts.seg.clsresdir,segid,VOCopts.testset); +resultsdirinst = sprintf(VOCopts.seg.instresdir,segid,VOCopts.testset); + +if ~exist(resultsdir,'dir') + mkdir(resultsdir); +end + +if ~exist(resultsdirinst,'dir') + mkdir(resultsdirinst); +end + +% load test set + +imgids=textread(sprintf(VOCopts.seg.imgsetpath,VOCopts.testset),'%s'); + +cmap = VOClabelcolormap(255); +detids={dets.id}; +tic; +for j=1:numel(imgids) + % display progress + if toc>1 + fprintf('make segmentation: %d/%d\n',j,numel(imgids)); + drawnow; + tic; + end + imname = imgids{j}; + + classlabelfile = sprintf(VOCopts.seg.clsrespath,segid,VOCopts.testset,imname); + instlabelfile = sprintf(VOCopts.seg.instrespath,segid,VOCopts.testset,imname); + + imgfile = sprintf(VOCopts.imgpath,imname); + imginfo = imfinfo(imgfile); + + vdets=dets(strmatch(imname,detids,'exact')); + + [instim,classim]= convert_dets_to_image(imginfo.Width, imginfo.Height,vdets,confidence); + imwrite(instim,cmap,instlabelfile); + imwrite(classim,cmap,classlabelfile); + +% Copy in ground truth - uncomment to copy ground truth segmentations in +% for comparison +% gtlabelfile = [VOCopts.root '/Segmentations(class)/' imname '.png']; +% gtclasslabelfile = sprintf('%s/%d_gt.png',resultsdir,imnums(j)); +% copyfile(gtlabelfile,gtclasslabelfile); +end + +% Converts a set of detected bounding boxes into an instance-labelled image +% and a class-labelled image +function [instim,classim]=convert_dets_to_image(W,H,dets,confidence) + +instim = uint8(zeros([H W])); +classim = uint8(zeros([H W])); +[sc,si]=sort([dets.conf]); +si(sc1 + fprintf('%s: train: %d/%d\n',cls,i,length(imgids)); + drawnow; + tic; + end + + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + obj=rec.objects(objids(i)); + + fd=extractfd(VOCopts,obj); + classifier.FD(1:length(fd),i)=fd; +end + +% run classifier on test images +function test(VOCopts,cls,classifier) + +% load test set ('val' for development kit) +[imgids,objids,gt]=textread(sprintf(VOCopts.action.clsimgsetpath,cls,VOCopts.testset),'%s %d %d'); + +% create results file +fid=fopen(sprintf(VOCopts.action.respath,'comp9',cls),'w'); + +% classify each person +tic; +for i=1:length(imgids) + % display progress + if toc>1 + fprintf('%s: test: %d/%d\n',cls,i,length(imgids)); + drawnow; + tic; + end + + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + obj=rec.objects(objids(i)); + + fd=extractfd(VOCopts,obj); + + % compute confidence of positive classification + c=classify(VOCopts,classifier,fd); + + % write to results file + fprintf(fid,'%s %d %f\n',imgids{i},objids(i),c); +end + +% close results file +fclose(fid); + +% trivial feature extractor: bounding box aspect ratio +function fd = extractfd(VOCopts,obj) + +w=obj.bndbox.xmax-obj.bndbox.xmin+1; +h=obj.bndbox.ymax-obj.bndbox.ymin+1; +fd=w/h; + +% trivial classifier: compute ratio of L2 distance betweeen +% nearest positive (class) feature vector and nearest negative (non-class) +% feature vector +function c = classify(VOCopts,classifier,fd) + +d=sum(fd.*fd,1)+sum(classifier.FD.*classifier.FD,1)-2*fd'*classifier.FD; +dp=min(d(classifier.gt>0)); +dn=min(d(classifier.gt<0)); +c=dn/(dp+eps); diff --git a/external/VOCdevkit/example_action_nobb.m b/external/VOCdevkit/example_action_nobb.m new file mode 100755 index 00000000..13e5e5bc --- /dev/null +++ b/external/VOCdevkit/example_action_nobb.m @@ -0,0 +1,94 @@ +function example_action_nobb + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% train and test classifier for each action +for i=2:VOCopts.nactions % skip "other" + cls=VOCopts.actions{i}; + classifier=train(VOCopts,cls); % train classifier + test(VOCopts,cls,classifier); % test classifier + [recall,prec,ap]=VOCevalaction(VOCopts,'comp9',cls,true); % compute and display PR + + if i1 + fprintf('%s: train: %d/%d\n',cls,i,length(imgids)); + drawnow; + tic; + end + + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + obj=rec.objects(objids(i)); + + fd=extractfd(VOCopts,obj); + classifier.FD(1:length(fd),i)=fd; +end + +% run classifier on test images +function test(VOCopts,cls,classifier) + +% load test set ('val' for development kit) +[imgids,objids,gt]=textread(sprintf(VOCopts.action.clsimgsetpath,cls,VOCopts.testset),'%s %d %d'); + +% create results file +fid=fopen(sprintf(VOCopts.action.respath,'comp11',cls),'w'); + +% classify each person +tic; +for i=1:length(imgids) + % display progress + if toc>1 + fprintf('%s: test: %d/%d\n',cls,i,length(imgids)); + drawnow; + tic; + end + + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + obj=rec.objects(objids(i)); + + fd=extractfd(VOCopts,obj); + + % compute confidence of positive classification + c=classify(VOCopts,classifier,fd); + + % write to results file + fprintf(fid,'%s %d %f\n',imgids{i},objids(i),c); +end + +% close results file +fclose(fid); + +% trivial feature extractor: reference point of person in image (!) +function fd = extractfd(VOCopts,obj) + +fd=[obj.point.x ; obj.point.y]; + +% trivial classifier: compute ratio of L2 distance betweeen +% nearest positive (class) feature vector and nearest negative (non-class) +% feature vector +function c = classify(VOCopts,classifier,fd) + +d=sum(fd.*fd,1)+sum(classifier.FD.*classifier.FD,1)-2*fd'*classifier.FD; +dp=min(d(classifier.gt>0)); +dn=min(d(classifier.gt<0)); +c=dn/(dp+eps); diff --git a/external/VOCdevkit/example_classifier.m b/external/VOCdevkit/example_classifier.m new file mode 100755 index 00000000..bbc1cf1f --- /dev/null +++ b/external/VOCdevkit/example_classifier.m @@ -0,0 +1,107 @@ +function example_classifier + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% train and test classifier for each class +for i=1:VOCopts.nclasses + cls=VOCopts.classes{i}; + classifier=train(VOCopts,cls); % train classifier + test(VOCopts,cls,classifier); % test classifier + [recall,prec,ap]=VOCevalcls(VOCopts,'comp1',cls,true); % compute and display PR + + if i1 + fprintf('%s: train: %d/%d\n',cls,i,length(ids)); + drawnow; + tic; + end + + fdp=sprintf(VOCopts.exfdpath,ids{i}); + if exist(fdp,'file') + % load features + load(fdp,'fd'); + else + % compute and save features + I=imread(sprintf(VOCopts.imgpath,ids{i})); + fd=extractfd(VOCopts,I); + save(fdp,'fd'); + end + + classifier.FD(1:length(fd),i)=fd; +end + +% run classifier on test images +function test(VOCopts,cls,classifier) + +% load test set ('val' for development kit) +[ids,gt]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); + +% create results file +fid=fopen(sprintf(VOCopts.clsrespath,'comp1',cls),'w'); + +% classify each image +tic; +for i=1:length(ids) + % display progress + if toc>1 + fprintf('%s: test: %d/%d\n',cls,i,length(ids)); + drawnow; + tic; + end + + fdp=sprintf(VOCopts.exfdpath,ids{i}); + if exist(fdp,'file') + % load features + load(fdp,'fd'); + else + % compute and save features + I=imread(sprintf(VOCopts.imgpath,ids{i})); + fd=extractfd(VOCopts,I); + save(fdp,'fd'); + end + + % compute confidence of positive classification + c=classify(VOCopts,classifier,fd); + + % write to results file + fprintf(fid,'%s %f\n',ids{i},c); +end + +% close results file +fclose(fid); + +% trivial feature extractor: compute mean RGB +function fd = extractfd(VOCopts,I) + +fd=squeeze(sum(sum(double(I)))/(size(I,1)*size(I,2))); + +% trivial classifier: compute ratio of L2 distance betweeen +% nearest positive (class) feature vector and nearest negative (non-class) +% feature vector +function c = classify(VOCopts,classifier,fd) + +d=sum(fd.*fd)+sum(classifier.FD.*classifier.FD)-2*fd'*classifier.FD; +dp=min(d(classifier.gt>0)); +dn=min(d(classifier.gt<0)); +c=dn/(dp+eps); diff --git a/external/VOCdevkit/example_detector.m b/external/VOCdevkit/example_detector.m new file mode 100755 index 00000000..98061035 --- /dev/null +++ b/external/VOCdevkit/example_detector.m @@ -0,0 +1,164 @@ +function example_detector + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% train and test detector for each class +for i=1:VOCopts.nclasses + cls=VOCopts.classes{i}; + detector=train(VOCopts,cls); % train detector + test(VOCopts,cls,detector); % test detector + [recall,prec,ap]=VOCevaldet(VOCopts,'comp3',cls,true); % compute and display PR + + if i1 + fprintf('%s: load: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + + % read annotation + recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); + end + save(cp,'gtids','recs'); +end + +% extract features and bounding boxes +detector.FD=[]; +detector.bbox={}; +detector.gt=[]; +tic; +for i=1:length(gtids) + % display progress + if toc>1 + fprintf('%s: train: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + + % find objects of class and extract difficult flags for these objects + clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); + diff=[recs(i).objects(clsinds).difficult]; + + % assign ground truth class to image + if isempty(clsinds) + gt=-1; % no objects of class + elseif any(~diff) + gt=1; % at least one non-difficult object of class + else + gt=0; % only difficult objects + end + + if gt + % extract features for image + fdp=sprintf(VOCopts.exfdpath,gtids{i}); + if exist(fdp,'file') + % load features + load(fdp,'fd'); + else + % compute and save features + I=imread(sprintf(VOCopts.imgpath,gtids{i})); + fd=extractfd(VOCopts,I); + save(fdp,'fd'); + end + + detector.FD(1:length(fd),end+1)=fd; + + % extract bounding boxes for non-difficult objects + + detector.bbox{end+1}=cat(1,recs(i).objects(clsinds(~diff)).bbox)'; + + % mark image as positive or negative + + detector.gt(end+1)=gt; + end +end + +% run detector on test images +function out = test(VOCopts,cls,detector) + +% load test set ('val' for development kit) +[ids,gt]=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s %d'); + +% create results file +fid=fopen(sprintf(VOCopts.detrespath,'comp3',cls),'w'); + +% apply detector to each image +tic; +for i=1:length(ids) + % display progress + if toc>1 + fprintf('%s: test: %d/%d\n',cls,i,length(ids)); + drawnow; + tic; + end + + fdp=sprintf(VOCopts.exfdpath,ids{i}); + if exist(fdp,'file') + % load features + load(fdp,'fd'); + else + % compute and save features + I=imread(sprintf(VOCopts.imgpath,ids{i})); + fd=extractfd(VOCopts,I); + save(fdp,'fd'); + end + + % compute confidence of positive classification and bounding boxes + [c,BB]=detect(VOCopts,detector,fd); + + % write to results file + for j=1:length(c) + fprintf(fid,'%s %f %f %f %f %f\n',ids{i},c(j),BB(:,j)); + end +end + +% close results file +fclose(fid); + +% trivial feature extractor: compute mean RGB +function fd = extractfd(VOCopts,I) + +fd=squeeze(sum(sum(double(I)))/(size(I,1)*size(I,2))); + +% trivial detector: confidence is computed as in example_classifier, and +% bounding boxes of nearest positive training image are output +function [c,BB] = detect(VOCopts,detector,fd) + +% compute confidence +d=sum(fd.*fd)+sum(detector.FD.*detector.FD)-2*fd'*detector.FD; +dp=min(d(detector.gt>0)); +dn=min(d(detector.gt<0)); +c=dn/(dp+eps); + +% copy bounding boxes from nearest positive image +pinds=find(detector.gt>0); +[dp,di]=min(d(pinds)); +pind=pinds(di); +BB=detector.bbox{pind}; + +% replicate confidence for each detection +c=ones(size(BB,2),1)*c; \ No newline at end of file diff --git a/external/VOCdevkit/example_layout.m b/external/VOCdevkit/example_layout.m new file mode 100755 index 00000000..26d02672 --- /dev/null +++ b/external/VOCdevkit/example_layout.m @@ -0,0 +1,106 @@ +function example_layout + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% train and test layout + +gtobjects=train(VOCopts); % train layout +test(VOCopts,gtobjects); % test layout +[recall,prec,ap]=VOCevallayout_pr(VOCopts,'comp7',true); % compute and display PR + +% train: extract all person objects with parts +function objects = train(VOCopts) + +% load training set +[imgids,objids]=textread(sprintf(VOCopts.layout.imgsetpath,VOCopts.trainset),'%s %d'); + +% extract objects +n=0; +tic; +for i=1:length(imgids) + % display progress + if toc>1 + fprintf('train: %d/%d\n',i,length(imgids)); + drawnow; + tic; + end + + % read annotation + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + + % extract object + n=n+1; + objects(n)=rec.objects(objids(i)); + + % move bounding box to origin + xmin=objects(n).bbox(1); + ymin=objects(n).bbox(2); + objects(n).bbox=objects(n).bbox-[xmin ymin xmin ymin]; + for j=1:numel(objects(n).part) + objects(n).part(j).bbox=objects(n).part(j).bbox-[xmin ymin xmin ymin]; + end +end + +% run layout on test images +function out = test(VOCopts,gtobjects) + +% load test set +[imgids,objids]=textread(sprintf(VOCopts.layout.imgsetpath,VOCopts.testset),'%s %d'); + +% estimate layout for each object + +GTBB=cat(1,gtobjects.bbox)'; +n=0; +tic; +for i=1:length(imgids) + % display progress + if toc>1 + fprintf('test: %d/%d\n',i,length(imgids)); + drawnow; + tic; + end + + % read annotation + rec=PASreadrecord(sprintf(VOCopts.annopath,imgids{i})); + + % extract bounding box + bb=rec.objects(objids(i)).bbox; + + % move to origin + xmin=bb(1); + ymin=bb(2); + bb=bb-[xmin ymin xmin ymin]; + + % find nearest ground truth bounding box + + d=sum(bb.*bb)+sum(GTBB.*GTBB,1)-2*bb*GTBB; + [dmin,nn]=min(d); + + % copy layout from nearest neighbour + + clear l; + l.image=imgids{i}; % image identifier + l.object=num2str(objids(i)); % object identifier + l.confidence=num2str(-dmin); % confidence + nno=gtobjects(nn); + for j=1:numel(nno.part) + l.part(j).class=nno.part(j).class; % part class + l.part(j).bndbox.xmin=num2str(nno.part(j).bbox(1)+xmin); % bounding box + l.part(j).bndbox.ymin=num2str(nno.part(j).bbox(2)+ymin); + l.part(j).bndbox.xmax=num2str(nno.part(j).bbox(3)+xmin); + l.part(j).bndbox.ymax=num2str(nno.part(j).bbox(4)+ymin); + end + + % add layout result + n=n+1; + xml.results.layout(n)=l; +end + +% write results file + +fprintf('saving results\n'); +VOCwritexml(xml,sprintf(VOCopts.layout.respath,'comp7')); diff --git a/external/VOCdevkit/example_segmenter.m b/external/VOCdevkit/example_segmenter.m new file mode 100755 index 00000000..d9315185 --- /dev/null +++ b/external/VOCdevkit/example_segmenter.m @@ -0,0 +1,16 @@ +% example_segmenter Segmentation algorithm based on detection results. +% +% This segmenter requires that some detection results are present in +% 'Results' e.g. by running 'example_detector'. +% +% Segmentations are generated from detection bounding boxes. +function example_segmenter + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +create_segmentations_from_detections('comp3',1); +VOCevalseg(VOCopts,'comp5'); diff --git a/external/VOCdevkit/local/VOC2012/_dummy b/external/VOCdevkit/local/VOC2012/_dummy new file mode 100755 index 00000000..e69de29b diff --git a/external/VOCdevkit/results/VOC2012/Action/_dummy b/external/VOCdevkit/results/VOC2012/Action/_dummy new file mode 100755 index 00000000..e69de29b diff --git a/external/VOCdevkit/results/VOC2012/Layout/_dummy b/external/VOCdevkit/results/VOC2012/Layout/_dummy new file mode 100755 index 00000000..e69de29b diff --git a/external/VOCdevkit/results/VOC2012/Main/_dummy b/external/VOCdevkit/results/VOC2012/Main/_dummy new file mode 100755 index 00000000..e69de29b diff --git a/external/VOCdevkit/results/VOC2012/Segmentation/_dummy b/external/VOCdevkit/results/VOC2012/Segmentation/_dummy new file mode 100755 index 00000000..e69de29b diff --git a/external/VOCdevkit/viewanno.m b/external/VOCdevkit/viewanno.m new file mode 100755 index 00000000..794923c3 --- /dev/null +++ b/external/VOCdevkit/viewanno.m @@ -0,0 +1,103 @@ +function viewanno(imgset) + +if nargin<1 + error(['usage: viewanno(imgset) e.g. viewanno(' 39 'Main/train' 39 ') ' ... + 'or viewanno(' 39 'Main/car_train' 39 ')']); +end + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% load image set +[ids,gt]=textread(sprintf(VOCopts.imgsetpath,['../' imgset]),'%s %d'); + +for i=1:length(ids) + + % read annotation + rec=PASreadrecord(sprintf(VOCopts.annopath,ids{i})); + + % read image + I=imread(sprintf(VOCopts.imgpath,ids{i})); + + if rec.segmented + + % read segmentations + + [Sclass,CMclass]=imread(sprintf(VOCopts.seg.clsimgpath,ids{i})); + [Sobj,CMobj]=imread(sprintf(VOCopts.seg.instimgpath,ids{i})); + end + + % display annotation + + if rec.segmented + subplot(311); + else + clf; + end + + imagesc(I); + hold on; + for j=1:length(rec.objects) + bb=rec.objects(j).bbox; + lbl=rec.objects(j).class; + if rec.objects(j).difficult + ls='r'; % "difficult": red + else + ls='g'; % not "difficult": green + end + if rec.objects(j).truncated + lbl=[lbl 'T']; + end + if rec.objects(j).occluded + lbl=[lbl 'O']; + end + plot(bb([1 3 3 1 1]),bb([2 2 4 4 2]),ls,'linewidth',2); + text(bb(1),bb(2),lbl,'color','k','backgroundcolor',ls(1),... + 'verticalalignment','top','horizontalalignment','left','fontsize',8); + + if isfield(rec.objects(j),'actions') + albl=''; + for k=1:VOCopts.nactions + if rec.objects(j).actions.(VOCopts.actions{k}) + if ~isempty(albl) + albl=[albl '+']; + end + albl=[albl VOCopts.actions{k}]; + end + end + text(bb(3),bb(4),albl,'color','k','backgroundcolor',ls(1),... + 'verticalalignment','bottom','horizontalalignment','right','fontsize',8); + end + + for k=1:length(rec.objects(j).part) + bb=rec.objects(j).part(k).bbox; + plot(bb([1 3 3 1 1]),bb([2 2 4 4 2]),[ls ':'],'linewidth',2); + text(bb(1),bb(2),rec.objects(j).part(k).class,'color','k','backgroundcolor',ls(1),... + 'verticalalignment','top','horizontalalignment','left','fontsize',8); + end + end + hold off; + axis image off; + title(sprintf('image: %d/%d: "%s" (red=difficult, T=truncated, O=occluded)',... + i,length(ids),ids{i}),'interpreter','none'); + + if rec.segmented + subplot(312); + imshow(Sclass,CMclass); + axis image; + axis off; + title('segmentation by class'); + + subplot(313); + imshow(Sobj,CMobj); + axis image; + axis off; + title('segmentation by object'); + end + + fprintf('press any key to continue with next image\n'); + pause; +end diff --git a/external/VOCdevkit/viewdet.m b/external/VOCdevkit/viewdet.m new file mode 100755 index 00000000..6df21788 --- /dev/null +++ b/external/VOCdevkit/viewdet.m @@ -0,0 +1,129 @@ +function viewdet(id,cls,onlytp) + +if nargin<2 + error(['usage: viewdet(competition,class,onlytp) e.g. viewdet(''comp3'',''car'') or ' ... + 'viewdet(''comp3'',''car'',true) to show true positives']); +end + +if nargin<3 + onlytp=false; +end + +% change this path if you install the VOC code elsewhere +addpath([cd '/VOCcode']); + +% initialize VOC options +VOCinit; + +% load test set +cp=sprintf(VOCopts.annocachepath,VOCopts.testset); +if exist(cp,'file') + fprintf('%s: loading test set\n',cls); + load(cp,'gtids','recs'); +else + gtids=textread(sprintf(VOCopts.imgsetpath,VOCopts.testset),'%s'); + for i=1:length(gtids) + % display progress + if toc>1 + fprintf('%s: load: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + + % read annotation + recs(i)=PASreadrecord(sprintf(VOCopts.annopath,gtids{i})); + end + save(cp,'gtids','recs'); +end + +% extract ground truth objects + +npos=0; +gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); +for i=1:length(gtids) + % extract objects of class + clsinds=strmatch(cls,{recs(i).objects(:).class},'exact'); + gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; + gt(i).diff=[recs(i).objects(clsinds).difficult]; + gt(i).det=false(length(clsinds),1); + npos=npos+sum(~gt(i).diff); +end + +% load results +[ids,confidence,b1,b2,b3,b4]=textread(sprintf(VOCopts.detrespath,id,cls),'%s %f %f %f %f %f'); +BB=[b1 b2 b3 b4]'; + +% sort detections by decreasing confidence +[sc,si]=sort(-confidence); +ids=ids(si); +BB=BB(:,si); + +% view detections + +clf; +nd=length(confidence); +tic; +for d=1:nd + % display progress + if onlytp&toc>1 + fprintf('%s: viewdet: find true pos: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + + % find ground truth image + i=strmatch(ids{d},gtids,'exact'); + if isempty(i) + error('unrecognized image "%s"',ids{d}); + elseif length(i)>1 + error('multiple image "%s"',ids{d}); + end + + % assign detection to ground truth object if any + bb=BB(:,d); + ovmax=-inf; + for j=1:size(gt(i).BB,2) + bbgt=gt(i).BB(:,j); + bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; + iw=bi(3)-bi(1)+1; + ih=bi(4)-bi(2)+1; + if iw>0 & ih>0 + % compute overlap as area of intersection / area of union + ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... + (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... + iw*ih; + ov=iw*ih/ua; + if ov>ovmax + ovmax=ov; + jmax=j; + end + end + end + + % skip false positives + if onlytp&ovmax=VOCopts.minoverlap + bbgt=gt(i).BB(:,jmax); + plot(bbgt([1 3 3 1 1]),bbgt([2 2 4 4 2]),'y-','linewidth',2); + plot(bb([1 3 3 1 1]),bb([2 2 4 4 2]),'g:','linewidth',2); + else + plot(bb([1 3 3 1 1]),bb([2 2 4 4 2]),'r-','linewidth',2); + end + hold off; + axis image; + axis off; + title(sprintf('det %d/%d: image: "%s" (green=true pos,red=false pos,yellow=ground truth',... + d,nd,gtids{i}),'interpreter','none'); + + fprintf('press any key to continue with next image\n'); + pause; +end