Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding camvid dataset #402

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions fuel/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from fuel.converters import adult
from fuel.converters import binarized_mnist
from fuel.converters import caltech101_silhouettes
from fuel.converters import camvid
from fuel.converters import celeba
from fuel.converters import cifar10
from fuel.converters import cifar100
Expand All @@ -29,6 +30,7 @@
('adult', adult.fill_subparser),
('binarized_mnist', binarized_mnist.fill_subparser),
('caltech101_silhouettes', caltech101_silhouettes.fill_subparser),
('camvid', camvid.fill_subparser),
('celeba', celeba.fill_subparser),
('cifar10', cifar10.fill_subparser),
('cifar100', cifar100.fill_subparser),
Expand Down
109 changes: 109 additions & 0 deletions fuel/converters/camvid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
import zipfile

import h5py
import numpy
from six.moves import range
from PIL import Image

from fuel.converters.base import check_exists, progress_bar
from fuel.datasets import H5PYDataset

DATASET_FILE = 'camvid_dataset.zip'
ATTRIBUTES_FILE_TRAIN = 'train.txt'
ATTRIBUTES_FILE_VALID = 'val.txt'
ATTRIBUTES_FILE_TEST = 'test.txt'
DATASET_FILES = [ATTRIBUTES_FILE_TRAIN, ATTRIBUTES_FILE_VALID, ATTRIBUTES_FILE_TEST]
NUM_EXAMPLES = 701
TRAIN_STOP = 367
VALID_STOP = 468
OUTPUT_FILENAME = 'camvid.hdf5'


def _initialize_conversion(directory, output_path, image_shape):
h5file = h5py.File(output_path, mode='w')
split_dict = {
'train': {
'features': (0, TRAIN_STOP),
'targets': (0, TRAIN_STOP)},
'valid': {
'features': (TRAIN_STOP, VALID_STOP),
'targets': (TRAIN_STOP, VALID_STOP)},
'test': {
'features': (VALID_STOP, NUM_EXAMPLES),
'targets': (VALID_STOP, NUM_EXAMPLES)}}
h5file.attrs['split'] = H5PYDataset.create_split_array(split_dict)

targets_dataset = h5file.create_dataset(
'targets', (NUM_EXAMPLES,) + image_shape, dtype='uint8')
targets_dataset.dims[0].label = 'batch'
targets_dataset.dims[1].label = 'height'
targets_dataset.dims[2].label = 'width'

features_dataset = h5file.create_dataset(
'features', (NUM_EXAMPLES, 3) + image_shape, dtype='uint8')
features_dataset.dims[0].label = 'batch'
features_dataset.dims[1].label = 'channel'
features_dataset.dims[2].label = 'height'
features_dataset.dims[3].label = 'width'

return h5file

@check_exists(required_files=DATASET_FILES)
def convert_camvid(directory, output_directory,
output_filename='camvid.hdf5'):
"""Converts the camvid dataset to HDF5.

Converts the camvid dataset to an HDF5 dataset compatible with
:class:`fuel.datasets.camvid`. The converted dataset is
saved as 'camvid.hdf5'.

Parameters
----------
directory : str
Directory in which input files reside.
output_directory : str
Directory in which to save the converted dataset.
output_filename : str, optional
Name of the saved dataset. Defaults to
'camvid_aligned_cropped.hdf5' or 'camvid_64.hdf5',
depending on `which_format`.

Returns
-------
output_paths : tuple of str
Single-element tuple containing the path to the converted dataset.

"""
output_path = os.path.join(output_directory, output_filename)
h5file = _initialize_conversion(directory, output_path, (360, 480))
image_file_path = os.path.join(directory, DATASET_FILE)

features_dataset = h5file['features']
targets_dataset = h5file['targets']
with zipfile.ZipFile(image_file_path, 'r'):
with progress_bar('images', NUM_EXAMPLES) as bar:
for files in DATASET_FILES:
open_file = open(files, 'r')
for i, line in enumerate(open_file):
image_name, target_name = line.split()
image = Image.open(image_name[15:], 'r')
target = Image.open(target_name[15:], 'r')
features_dataset[i] = numpy.asarray(image).transpose(2, 0, 1)
targets_dataset[i] = numpy.asarray(target)
bar.update(i + 1)

h5file.flush()
h5file.close()

return (output_path,)


def fill_subparser(subparser):
"""Sets up a subparser to convert the Camvid dataset files.
Parameters
----------
subparser : :class:`argparse.ArgumentParser`
Subparser handling the `camvid` command.
"""
return convert_camvid
1 change: 1 addition & 0 deletions fuel/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from fuel.datasets.hdf5 import H5PYDataset
from fuel.datasets.adult import Adult
from fuel.datasets.binarized_mnist import BinarizedMNIST
from fuel.datasets.camvid import Camvid
from fuel.datasets.celeba import CelebA
from fuel.datasets.cifar10 import CIFAR10
from fuel.datasets.cifar100 import CIFAR100
Expand Down
35 changes: 35 additions & 0 deletions fuel/datasets/camvid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from fuel.datasets import H5PYDataset
from fuel.transformers.defaults import uint8_pixels_to_floatX
from fuel.utils import find_in_data_path


class Camvid(H5PYDataset):
'''The CamVid motion based segmentation dataset
The Cambridge-driving Labeled Video Database (CamVid) [Camvid1]_ provides
high-quality videos acquired at 30 Hz with the corresponding
semantically labeled masks at 1 Hz and in part, 15 Hz. The ground
truth labels associate each pixel with one of 32 semantic classes.
This loader is intended for the SegNet version of the CamVid dataset,
that resizes the original data to 360 by 480 resolution and remaps
the ground truth to a subset of 11 semantic classes, plus a void
class.
The dataset should be downloaded from [Camvid2].
Parameters
----------
which_sets: string
A string in ['train', 'valid', 'test'], corresponding to
the set to be returned.
References
----------
.. [Camvid1] http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/
.. [Camvid2]
https://github.com/alexgkendall/SegNet-Tutorial/tree/master/CamVid
'''

filename = 'camvid.hdf5'
default_transformers = uint8_pixels_to_floatX(('features',))

def __init__(self, which_sets, **kwargs):
super(Camvid, self).__init__(
file_or_path=find_in_data_path(self.filename),
which_sets=which_sets, **kwargs)
2 changes: 2 additions & 0 deletions fuel/downloaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from fuel.downloaders import adult
from fuel.downloaders import binarized_mnist
from fuel.downloaders import caltech101_silhouettes
from fuel.downloaders import camvid
from fuel.downloaders import celeba
from fuel.downloaders import cifar10
from fuel.downloaders import cifar100
Expand All @@ -24,6 +25,7 @@
('adult', adult.fill_subparser),
('binarized_mnist', binarized_mnist.fill_subparser),
('caltech101_silhouettes', caltech101_silhouettes.fill_subparser),
('camvid', camvid.fill_subparser),
('celeba', celeba.fill_subparser),
('cifar10', cifar10.fill_subparser),
('cifar100', cifar100.fill_subparser),
Expand Down
15 changes: 15 additions & 0 deletions fuel/downloaders/camvid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from fuel.downloaders.base import default_downloader


def fill_subparser(subparser):
"""Sets up a subparser to download the Camvid dataset file.

Parameters
----------
subparser : :class:`argparse.ArgumentParser`
Subparser handling the `camvid` command.
"""
url = ['To be definied']
filenames = ['camvid_dataset.zip']
subparser.set_defaults(urls=url, filenames=filenames)
return default_downloader
29 changes: 29 additions & 0 deletions tests/test_camvid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os

import h5py
import numpy
from numpy.testing import assert_equal

from fuel import config
from fuel.datasets import H5PYDataset, Camvid


def test_camvid():
data_path = config.data_path
try:
config.data_path = '.'
f = h5py.File('camvid.hdf5', 'w')
f['features'] = numpy.arange(
10 * 3 * 360 * 480, dtype='uint8').reshape((10, 3, 360, 480))
f['targets'] = numpy.arange(
10 * 360 * 480, dtype='uint8').reshape((10, 360, 480))
split_dict = {'train': {'features': (0, 6), 'targets': (0, 6)},
'valid': {'features': (6, 8), 'targets': (6, 8)},
'test': {'features': (8, 10), 'targets': (8, 10)}}
f.attrs['split'] = H5PYDataset.create_split_array(split_dict)
f.close()
dataset = Camvid(which_sets=('train',))
assert_equal(dataset.filename, 'camvid.hdf5')
finally:
config.data_path = data_path
os.remove('camvid.hdf5')