diff --git a/hela/__init__.py b/hela/__init__.py
new file mode 100644
index 0000000..844990f
--- /dev/null
+++ b/hela/__init__.py
@@ -0,0 +1,8 @@
+
+from .dataset import *
+from .feature_importance import *
+from .models import *
+from .posterior import *
+from .plot import *
+from .utils import *
+from .wpercentile import *
diff --git a/dataset.py b/hela/dataset.py
similarity index 65%
rename from dataset.py
rename to hela/dataset.py
index 727db15..5be73a5 100644
--- a/dataset.py
+++ b/hela/dataset.py
@@ -8,8 +8,7 @@
 
 __all__ = ["Dataset", "load_dataset", "load_data_file"]
 
-logger = logging.getLogger(__name__)
-
+LOGGER = logging.getLogger(__name__)
 
 Dataset = namedtuple("Dataset", ["training_x", "training_y",
                                  "testing_x", "testing_y",
@@ -17,39 +16,40 @@
 
 
 def load_data_file(data_file, num_features):
-    
+
     data = np.load(data_file)
-    
+
     if data.ndim == 1:
         data = data[None, :]
-    
+
     x = data[:, :num_features]
     y = data[:, num_features:]
-    
+
     return x, y
 
 
-def load_dataset(dataset_file):
-    
+def load_dataset(dataset_file, load_testing_data=True):
+
     with open(dataset_file, "r") as f:
         dataset_info = json.load(f)
-    
+
     metadata = dataset_info["metadata"]
-    
+
     base_path = os.path.dirname(dataset_file)
-    
+
     # Load training data
-    training_file = os.path.join(base_path, dataset_info["training_data"])    
-    logger.debug("Loading training data from '{}'...".format(training_file))
-    training_x, training_y = load_data_file(training_file, metadata["num_features"])
-    
+    training_file = os.path.join(base_path, dataset_info["training_data"])
+    LOGGER.debug("Loading training data from '%s'...", training_file)
+    training_x, training_y = load_data_file(training_file,
+                                            metadata["num_features"])
+
     # Optionally, load testing data
     testing_x, testing_y = None, None
-    if dataset_info["testing_data"] is not None:
+    if load_testing_data and dataset_info["testing_data"] is not None:
         testing_file = os.path.join(base_path, dataset_info["testing_data"])
-        logger.debug("Loading testing data from '{}'...".format(testing_file))
-        testing_x, testing_y = load_data_file(testing_file, metadata["num_features"])
-    
+        LOGGER.debug("Loading testing data from '%s'...", testing_file)
+        testing_x, testing_y = load_data_file(testing_file,
+                                              metadata["num_features"])
+
     return Dataset(training_x, training_y, testing_x, testing_y,
                    metadata["names"], metadata["ranges"], metadata["colors"])
-
diff --git a/hela/feature_importance.py b/hela/feature_importance.py
new file mode 100644
index 0000000..1f1537e
--- /dev/null
+++ b/hela/feature_importance.py
@@ -0,0 +1,183 @@
+
+from collections import namedtuple
+from multiprocessing import Pool
+from typing import Optional, Tuple
+import logging
+
+import numpy as np
+
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.tree import DecisionTreeRegressor
+
+from .models import _tree_weights
+from .utils import tqdm
+
+__all__ = [
+    'importances_per_output'
+]
+
+LOGGER = logging.getLogger(__name__)
+
+
+def importances_per_output(
+        forest: RandomForestRegressor,
+        x: np.ndarray,
+        y: np.ndarray
+        ) -> np.ndarray:
+    """
+    Compute the feature importances with a breakdown per output variable (i.e.,
+    per label).
+
+    Parameters
+    ----------
+    forest : RandomForestRegressor
+        A trained random forest from which feature importances will be
+        extracted.
+    x : array-like of shape (n_samples, n_features)
+        The input samples of the training dataset used to train `forest`.
+    y : array-like of shape (n_samples, n_outputs)
+
+    Returns
+    -------
+    importances : np.ndarray of shape (n_features, n_outputs)
+        The feature importances splitted according to the contributions to each
+        output variable. Summing along the columns of this array will provide
+        the global feature importances contained in
+        forest.feature_importances_.
+    """
+
+    impurities = impurities_per_output(forest, x, y)
+    return importances_from_impurities(forest, impurities)
+
+
+def importances_from_impurities(
+        forest: RandomForestRegressor,
+        impurities: np.ndarray
+        ) -> np.ndarray:
+
+    LOGGER.info("Computing importances from impurities...")
+    with Pool(forest.n_jobs) as pool:
+        importances_ = list(tqdm(
+            pool.imap(_tree_importances_per_output,
+                      zip(forest, impurities)),
+            total=len(forest)
+        ))
+
+    importances = np.mean(importances_, axis=0)
+
+    return importances / importances.sum()
+
+
+def _tree_importances_per_output(
+        args: Tuple[DecisionTreeRegressor, np.ndarray]
+        ) -> np.ndarray:
+
+    tree, impurities = args
+
+    tree_ = tree.tree_
+    importances = np.zeros((tree_.n_features, tree_.n_outputs))
+
+    Node = namedtuple(
+        "Node",
+        ['left', 'right', 'weighted_n_node_samples', 'feature', 'impurities']
+    )
+
+    for node in zip(tree_.children_left, tree_.children_right,
+                    tree_.weighted_n_node_samples, tree_.feature, impurities):
+        node = Node(*node)
+        if node.left == -1:
+            continue
+
+        left = Node(
+            None, None,
+            tree_.weighted_n_node_samples[node.left],
+            tree_.feature[node.left],
+            impurities[node.left]
+        )
+        right = Node(
+            None, None,
+            tree_.weighted_n_node_samples[node.right],
+            tree_.feature[node.right], impurities[node.right]
+        )
+
+        importances[node.feature] += (
+            node.weighted_n_node_samples * node.impurities -
+            left.weighted_n_node_samples * left.impurities -
+            right.weighted_n_node_samples * right.impurities
+        )
+
+    return importances / importances.sum()
+
+
+# Global variables to avoid pickling very large arrays with multiprocessing
+_X = None
+_Y = None
+
+
+def impurities_per_output(
+        forest: RandomForestRegressor,
+        x: np.ndarray,
+        y: np.ndarray
+        ) -> np.ndarray:
+
+    LOGGER.info("Computing impurities...")
+    global _X
+    global _Y
+    _X = x
+    _Y = y
+    with Pool(forest.n_jobs) as pool:
+        impurities = list(tqdm(
+            pool.imap(_tree_impurities_per_output, forest),
+            total=len(forest)
+        ))
+
+    return np.array(impurities)
+
+
+def _tree_impurities_per_output(tree: DecisionTreeRegressor) -> np.ndarray:
+
+    assert _X is not None and _Y is not None
+
+    tree_ = tree.tree_
+    x, y = _X, _Y
+
+    impurities = np.zeros((tree.tree_.node_count, y.shape[1]))
+    weights = _tree_weights(tree, len(x))
+
+    def __tree_impurities_per_output(node_idx, x, y, weights):
+
+        imp = _wvariance(y, weights, axis=0)
+        impurities[node_idx] = imp
+
+        if tree_.children_left[node_idx] == -1:
+            return
+
+        feature = tree_.feature[node_idx]
+        threshold = tree_.threshold[node_idx]
+        mask = x[:, feature] <= threshold
+        __tree_impurities_per_output(
+            tree_.children_left[node_idx],
+            x[mask],
+            y[mask],
+            weights[mask]
+        )
+        mask = np.logical_not(mask)
+        __tree_impurities_per_output(
+            tree_.children_right[node_idx],
+            x[mask],
+            y[mask],
+            weights[mask]
+        )
+
+    __tree_impurities_per_output(0, x, y, weights)
+    return impurities
+
+
+def _wvariance(
+        data: np.ndarray,
+        weights: np.ndarray,
+        axis: Optional[int] = None
+        ) -> np.ndarray:
+
+    avg = np.average(data, weights=weights, axis=axis)
+    return np.average((data - avg)**2, weights=weights, axis=axis)
diff --git a/hela/models.py b/hela/models.py
new file mode 100644
index 0000000..bd0c8b8
--- /dev/null
+++ b/hela/models.py
@@ -0,0 +1,288 @@
+
+import logging
+from typing import Optional, Union, Dict, Iterable, List
+
+import numpy as np
+
+from sklearn import ensemble
+from sklearn.base import BaseEstimator
+from sklearn.utils import check_random_state
+from sklearn.preprocessing import MinMaxScaler
+
+from .utils import tqdm
+from .posterior import Posterior, posterior_percentile
+
+__all__ = [
+    "Model",
+]
+
+LOGGER = logging.getLogger(__name__)
+
+
+class Model(BaseEstimator):
+
+    def __init__(
+            self,
+            n_estimators: int = 1000,
+            criterion: str = 'mse',
+            max_features: Union[str, int, float] = 'sqrt',
+            min_impurity_decrease: float = 0.0,
+            bootstrap: bool = True,
+            n_jobs: Optional[int] = None,
+            random_state: Union[int, np.random.RandomState, None] = None,
+            verbose: int = 0,
+            max_samples: Union[float, int, None] = None,
+            enable_posterior: bool = True
+            ):
+
+        scaler = MinMaxScaler(feature_range=(0, 100))
+        self.random_forest = ensemble.RandomForestRegressor(
+            n_estimators=n_estimators,
+            criterion=criterion,
+            max_features=max_features,
+            min_impurity_decrease=min_impurity_decrease,
+            bootstrap=bootstrap,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            max_samples=max_samples
+        )
+
+        self.scaler = scaler
+
+        # To compute the posteriors
+        self.enable_posterior = enable_posterior
+        self.data_leaves = None
+        self.data_weights = None
+        self.data_y = None
+
+    def _scaler_fit(self, y):
+        if y.ndim == 1:
+            y = y[:, None]
+
+        self.scaler.fit(y)
+
+    def scaler_transform(self, y: np.ndarray) -> np.ndarray:
+        if y.ndim == 1:
+            y = y[:, None]
+            return self.scaler.transform(y)[:, 0]
+
+        return self.scaler.transform(y)
+
+    def scaler_inverse_transform(self, y: np.ndarray) -> np.ndarray:
+
+        if y.ndim == 1:
+            y = y[:, None]
+            # return self.scaler.inverse_transform(y)[:, 0]
+
+        return self.scaler.inverse_transform(y)
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
+        self._scaler_fit(y)
+        self.random_forest.fit(x, self.scaler_transform(y))
+
+        # Build the structures to quickly compute the posteriors
+        if self.enable_posterior:
+            data_leaves = self.random_forest.apply(x).T
+            self.data_leaves = _as_smallest_udtype(data_leaves)
+            self.data_weights = np.array(
+                [_tree_weights(tree, len(y)) for tree in self.random_forest]
+            )
+            self.data_y = y
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        pred = self.random_forest.predict(x)
+        return self.scaler_inverse_transform(pred)
+
+    def predict_median(
+            self,
+            x: np.ndarray,
+            prior_samples: Optional[np.ndarray] = None
+            ) -> np.ndarray:
+
+        return self.predict_percentile(x, 50, prior_samples)
+
+    def predict_percentile(
+            self,
+            x: np.ndarray,
+            percentile: Union[float, Iterable[float]],
+            prior_samples: Optional[np.ndarray] = None
+            ) -> np.ndarray:
+        """
+        Equivalent to  compute the posterior for each element of `x` and then
+        calling `posterior_percentile`. However, this is usually more efficient
+        than the naive approach when `x` has many elements.
+        """
+
+        if not self.enable_posterior:
+            raise ValueError("Cannot compute posteriors with this model. "
+                             "Set `enable_posterior` to True to enable "
+                             "posterior computation.")
+
+        # Find the leaves for the query points
+        leaves_x = self.random_forest.apply(x)
+
+        if prior_samples is None:
+            prior_samples = self.data_y
+
+        if len(x) > self.random_forest.n_estimators:
+            # If there are many queries,
+            # it is faster to find points using a cache
+            return _posterior_percentile_cache(
+                self.data_leaves, self.data_weights,
+                prior_samples, leaves_x, percentile
+            )
+
+        # For few queries, it is faster if we just compute the posterior
+        # for each element
+        return _posterior_percentile_nocache(
+            self.data_leaves, self.data_weights,
+            prior_samples, leaves_x, percentile
+        )
+
+    def posterior(
+            self,
+            x: np.ndarray,
+            prior_samples: Optional[np.ndarray] = None
+            ) -> Posterior:
+
+        if not self.enable_posterior:
+            raise ValueError("Cannot compute posteriors with this model. "
+                             "Set `enable_posterior` to True to enable "
+                             "posterior computation.")
+
+        if x.ndim > 1:
+            raise ValueError("x.ndim must be 1")
+
+        leaves_x = self.random_forest.apply(x[None, :])[0]
+
+        if prior_samples is None:
+            prior_samples = self.data_y
+
+        return _posterior(
+            self.data_leaves, self.data_weights,
+            prior_samples, leaves_x
+        )
+
+
+def _posterior(
+        data_leaves: np.ndarray,
+        data_weights: np.ndarray,
+        data_y: np.ndarray,
+        query_leaves: np.ndarray
+        ) -> Posterior:
+
+    weights_x = (query_leaves[:, None] == data_leaves) * data_weights
+    weights_x = weights_x.sum(0)
+
+    # Remove samples with weight zero
+    mask = weights_x != 0
+    samples = data_y[mask]
+    weights = weights_x[mask].astype(np.int_)
+
+    return Posterior(samples, weights)
+
+
+def _posterior_percentile_nocache(
+        data_leaves: np.ndarray,
+        data_weights: np.ndarray,
+        prior_samples: np.ndarray,
+        query_leaves: np.ndarray,
+        percentile: Union[float, Iterable[float]]
+        ) -> np.ndarray:
+
+    values = []
+
+    LOGGER.info("Computing percentiles...")
+    # This can be parallelized with multiprocessing.
+    for leaves_x_i in tqdm(query_leaves):
+        posterior = _posterior(
+            data_leaves, data_weights,
+            prior_samples, leaves_x_i
+        )
+        value = posterior_percentile(posterior, percentile)
+        values.append(value)
+
+    return np.array(values)
+
+
+def _posterior_percentile_cache(
+        data_leaves: np.ndarray,
+        data_weights: np.ndarray,
+        prior_samples: np.ndarray,
+        query_leaves: np.ndarray,
+        percentile: Union[float, Iterable[float]]
+        ) -> np.ndarray:
+
+    # Build a dictionary for fast access of the contents of the leaves.
+    LOGGER.info("Building cache...")
+    cache = [
+        _build_leaves_cache(leaves_i, weights_i)
+        for leaves_i, weights_i in zip(data_leaves, data_weights)
+    ]
+
+    values = []
+    # Check the contents of the leaves in leaves_x
+    LOGGER.info("Computing percentiles...")
+    # This can be parallelized with multiprocessing.
+    for leaves_x_i in tqdm(query_leaves):
+        indices: List[int] = []
+        weights: List[int] = []
+        for tree, leaves_x_i_j in enumerate(leaves_x_i):
+            cur_indices = cache[tree][0][leaves_x_i_j]
+            cur_weights = cache[tree][1][leaves_x_i_j]
+            indices.extend(cur_indices)
+            weights.extend(cur_weights)
+
+        posterior = Posterior(prior_samples[indices], weights)
+        value = posterior_percentile(posterior, percentile)
+        values.append(value)
+
+    return np.array(values)
+
+
+def _build_leaves_cache(data_leaves, data_weights):
+
+    indices: Dict[int, List[int]] = {}
+    weights: Dict[int, List[int]] = {}
+
+    for index, (leaf, weight) in enumerate(zip(data_leaves, data_weights)):
+        if weight == 0:
+            continue
+
+        if leaf not in indices:
+            indices[leaf] = [index]
+            weights[leaf] = [weight]
+        else:
+            indices[leaf].append(index)
+            weights[leaf].append(weight)
+
+    return indices, weights
+
+
+def _generate_sample_indices(random_state, n_samples):
+    random_instance = check_random_state(random_state)
+    sample_indices = random_instance.randint(0, n_samples, n_samples)
+
+    return sample_indices
+
+
+def _tree_weights(tree, n_samples):
+    indices = _generate_sample_indices(tree.random_state, n_samples)
+    res = np.bincount(indices, minlength=n_samples)
+    return _as_smallest_udtype(res)
+
+
+def _as_smallest_udtype(arr):
+    return arr.astype(_smallest_udtype(arr.max()))
+
+
+def _smallest_udtype(value):
+
+    dtypes = [np.uint8, np.uint16, np.uint32, np.uint64]
+
+    for dtype in dtypes:
+        if value <= np.iinfo(dtype).max:
+            return dtype
+
+    raise ValueError("value is too large for any dtype")
diff --git a/plot.py b/hela/plot.py
similarity index 93%
rename from plot.py
rename to hela/plot.py
index a3434f2..e72c107 100644
--- a/plot.py
+++ b/hela/plot.py
@@ -9,19 +9,15 @@
 from sklearn import metrics, neighbors
 from sklearn.preprocessing import MinMaxScaler
 
-from models import resample_posterior
-from wpercentile import wmedian
-
-try:
-    from tqdm import tqdm
-except ImportError:
-    def tqdm(x, *_, **__):
-        return x
+from .posterior import resample_posterior
+from .wpercentile import wmedian
+from .utils import tqdm
 
 __all__ = [
-    "predicted_vs_real",
-    "feature_importances",
-    "posterior_matrix"
+    'predicted_vs_real',
+    'feature_importances',
+    'stacked_feature_importances',
+    'posterior_matrix'
 ]
 
 POSTERIOR_MAX_SIZE = 10000
@@ -96,6 +92,26 @@ def feature_importances(forests, names, colors):
     return fig
 
 
+def stacked_feature_importances(importances, names, colors):
+
+    bottoms = np.zeros(importances.shape[0])
+
+    ind = np.arange(len(importances))
+
+    fig = plt.figure()
+    ax = fig.gca()
+    for data_i, name_i, color_i in zip(importances.T, names, colors):
+        ax.bar(ind, data_i, bottom=bottoms, color=color_i, label=name_i)
+        bottoms += data_i
+
+    ax.set_xlabel("Feature index", fontsize=18)
+    ax.legend(fontsize=16)
+    ax.grid()
+
+    fig.tight_layout()
+    return fig
+
+
 def posterior_matrix(posterior, names, ranges, colors, soft_colors=None):
 
     samples, weights = posterior
diff --git a/hela/posterior.py b/hela/posterior.py
new file mode 100644
index 0000000..1302d19
--- /dev/null
+++ b/hela/posterior.py
@@ -0,0 +1,40 @@
+
+from collections import namedtuple
+import logging
+from typing import Union, Iterable
+
+import numpy as np
+
+from .wpercentile import wpercentile
+
+__all__ = [
+    "Posterior",
+    "resample_posterior",
+    "posterior_percentile"
+]
+
+LOGGER = logging.getLogger(__name__)
+
+# Posteriors are represented as a collection of weighted samples
+Posterior = namedtuple("Posterior", ["samples", "weights"])
+
+
+def resample_posterior(posterior: Posterior, num_draws: int) -> Posterior:
+
+    p = posterior.weights / posterior.weights.sum()
+    indices = np.random.choice(len(posterior.samples), size=num_draws, p=p)
+
+    new_weights = np.bincount(indices, minlength=len(posterior.samples))
+    mask = new_weights != 0
+    new_samples = posterior.samples[mask]
+    new_weights = posterior.weights[mask]
+
+    return Posterior(new_samples, new_weights)
+
+
+def posterior_percentile(
+        posterior: Posterior,
+        percentiles: Union[float, Iterable[float]]) -> np.ndarray:
+
+    samples, weights = posterior
+    return wpercentile(samples, weights, percentiles, axis=0)
diff --git a/utils.py b/hela/utils.py
similarity index 88%
rename from utils.py
rename to hela/utils.py
index 414cc49..2cba725 100644
--- a/utils.py
+++ b/hela/utils.py
@@ -1,40 +1,46 @@
 
-import os
 import logging
 
+try:
+    from tqdm import tqdm
+except ImportError:
+    def tqdm(x, *_, **__):
+        return x
+
 __all__ = [
-    "config_logger"
+    'config_logger',
+    'tqdm'
 ]
 
 
 def config_logger(log_file="/dev/null", level=logging.INFO):
-    
+
     class MyFormatter(logging.Formatter):
-        
+
         info_format = "\x1b[32;1m%(asctime)s [%(name)s]\x1b[0m %(message)s"
         error_format = "\x1b[31;1m%(asctime)s [%(name)s] [%(levelname)s]\x1b[0m %(message)s"
-        
+
         def format(self, record):
-            
+
             if record.levelno > logging.INFO:
                 self._style._fmt = self.error_format
             else:
                 self._style._fmt = self.info_format
-            
+
             res = super(MyFormatter, self).format(record)
             return res
-    
+
     rootLogger = logging.getLogger()
-    
+
     fileHandler = logging.FileHandler(log_file)
     fileFormatter = logging.Formatter("%(asctime)s [%(name)s] [%(levelname)s]> %(message)s")
     fileHandler.setFormatter(fileFormatter)
     rootLogger.addHandler(fileHandler)
-    
+
     consoleHandler = logging.StreamHandler()
     consoleFormatter = MyFormatter()
     consoleHandler.setFormatter(consoleFormatter)
     rootLogger.addHandler(consoleHandler)
-    
+
     rootLogger.setLevel(level)
 
diff --git a/hela/wpercentile.py b/hela/wpercentile.py
new file mode 100644
index 0000000..8ab0d8f
--- /dev/null
+++ b/hela/wpercentile.py
@@ -0,0 +1,78 @@
+
+import numpy as np
+
+__all__ = [
+    "wpercentile",
+    "wmedian"
+]
+
+
+# def _wpercentile1d(data, weights, percentiles):
+
+#     if data.ndim > 1 or weights.ndim > 1:
+#         raise ValueError("data and weights must be one-dimensional arrays")
+
+#     if data.shape != weights.shape:
+#         raise ValueError("data and weights must have the same shape")
+
+#     data = np.asarray(data)
+#     weights = np.asarray(weights)
+#     percentiles = np.asarray(percentiles)
+
+#     sort_indices = np.argsort(data)
+#     sorted_data = data[sort_indices]
+#     sorted_weights = weights[sort_indices]
+
+#     cumsum_weights = np.cumsum(sorted_weights)
+#     sum_weights = cumsum_weights[-1]
+
+#     pn = 100 * (cumsum_weights - 0.5*sorted_weights) / sum_weights
+
+#     return np.interp(percentiles, pn, sorted_data)
+
+
+# def wpercentile_unsafe(data, weights, percentiles, axis=None):
+#     """
+#     Compute percentiles of a weighted sample.
+#     """
+
+#     # TODO: This code might fail with esoteric shapes of data, weights and
+#     # for some values of axis. Check this properly.
+
+#     if axis is None:
+#         data = np.ravel(data)
+#         weights = np.ravel(weights)
+#         return _wpercentile1d(data, weights, percentiles)
+
+#     axis = np.atleast_1d(axis)
+
+#     # Reshape the arrays for proper computation
+#     # Move the requested axis to the final dimensions
+#     dest_axis = list(range(len(axis)))
+#     data2 = np.moveaxis(data, axis, dest_axis)
+
+#     ndim = len(axis)
+#     shape = data2.shape
+#     newshape = (np.prod(shape[:ndim]), np.prod(shape[ndim:]))
+#     newdata = np.reshape(data2, newshape)
+#     newweights = np.reshape(weights, newshape[0])
+
+#     result = np.apply_along_axis(_wpercentile1d, 0, newdata, newweights,
+#                                  percentiles)
+
+#     final_shape = (*np.shape(percentiles), *shape[ndim:])
+#     return np.reshape(result, final_shape)
+
+
+def wpercentile(data, weights, percentiles, axis=None):
+
+    # This is a simple, error-free but pretty inefficient implementation of
+    # wpercentile.
+
+    data = np.repeat(data, weights, axis=axis)
+    return np.percentile(data, percentiles, axis=axis)
+
+
+def wmedian(data, weights, axis=None):
+    """Compute the weighted median."""
+    return wpercentile(data, weights, 50, axis)
diff --git a/models.py b/models.py
deleted file mode 100644
index 6dc3369..0000000
--- a/models.py
+++ /dev/null
@@ -1,262 +0,0 @@
-
-import logging
-from collections import namedtuple
-
-import numpy as np
-
-from sklearn import ensemble
-from sklearn.utils import check_random_state
-from sklearn.preprocessing import MinMaxScaler
-
-try:
-    from tqdm import tqdm
-except ImportError:
-    def tqdm(x, *_, **__):
-        return x
-
-__all__ = [
-    "Model",
-    "Posterior",
-    "resample_posterior"
-]
-
-logger = logging.getLogger(__name__)
-
-# Posteriors are represented as a collection of weighted samples
-Posterior = namedtuple("Posterior", ["samples", "weights"])
-
-
-def resample_posterior(posterior, num_draws):
-
-    p = posterior.weights / posterior.weights.sum()
-    indices = np.random.choice(len(posterior.samples), size=num_draws, p=p)
-
-    new_weights = np.bincount(indices, minlength=len(posterior.samples))
-    mask = new_weights != 0
-    new_samples = posterior.samples[mask]
-    new_weights = posterior.weights[mask]
-
-    return Posterior(new_samples, new_weights)
-
-
-class Model:
-
-    def __init__(self, num_trees, num_jobs,
-                 names, ranges, colors, enable_posterior=True,
-                 verbose=1):
-        scaler = MinMaxScaler(feature_range=(0, 100))
-        rf = ensemble.RandomForestRegressor(n_estimators=num_trees,
-                                            oob_score=True,
-                                            verbose=verbose,
-                                            n_jobs=num_jobs,
-                                            max_features="sqrt",
-                                            min_impurity_decrease=0.01)
-
-        self.scaler = scaler
-        self.rf = rf
-
-        self.num_trees = num_trees
-        self.num_jobs = num_jobs
-        self.verbose = verbose
-
-        self.ranges = ranges
-        self.names = names
-        self.colors = colors
-
-        # To compute the posteriors
-        self.enable_posterior = enable_posterior
-        self.data_leaves = None
-        self.data_weights = None
-        self.data_y = None
-
-    def _scaler_fit(self, y):
-        if y.ndim == 1:
-            y = y[:, None]
-
-        self.scaler.fit(y)
-
-    def _scaler_transform(self, y):
-        if y.ndim == 1:
-            y = y[:, None]
-            return self.scaler.transform(y)[:, 0]
-
-        return self.scaler.transform(y)
-
-    def _scaler_inverse_transform(self, y):
-
-        if y.ndim == 1:
-            y = y[:, None]
-            # return self.scaler.inverse_transform(y)[:, 0]
-
-        return self.scaler.inverse_transform(y)
-
-    def fit(self, x, y):
-        self._scaler_fit(y)
-        self.rf.fit(x, self._scaler_transform(y))
-
-        # Build the structures to quickly compute the posteriors
-        if self.enable_posterior:
-            data_leaves = self.rf.apply(x).T
-            self.data_leaves = _as_smallest_udtype(data_leaves)
-            self.data_weights = np.array(
-                [_tree_weights(tree, len(y)) for tree in self.rf]
-            )
-            self.data_y = y
-
-    def predict(self, x):
-        pred = self.rf.predict(x)
-        return self._scaler_inverse_transform(pred)
-
-    def predict_median(self, x):
-        return self.predict_percentile(x, 50)
-
-    def predict_percentile(self, x, percentile):
-
-        if not self.enable_posterior:
-            raise ValueError("Cannot compute posteriors with this model. "
-                             "Set `enable_posterior` to True to enable "
-                             "posterior computation.")
-
-        # Find the leaves for the query points
-        leaves_x = self.rf.apply(x)
-
-        if len(x) > self.num_trees:
-            # If there are many queries,
-            # it is faster to find points using a cache
-            return _posterior_percentile_cache(
-                self.data_leaves, self.data_weights,
-                self.data_y, leaves_x, percentile
-            )
-        else:
-            # For few queries, it is faster if we just compute the posterior
-            # for each element
-            return _posterior_percentile_nocache(
-                self.data_leaves, self.data_weights,
-                self.data_y, leaves_x, percentile
-            )
-
-    def get_params(self, deep=True):
-        return {
-            "num_trees": self.num_trees,
-            "num_jobs": self.num_jobs,
-            "names": self.names,
-            "ranges": self.ranges,
-            "colors": self.colors,
-            "enable_posterior": self.enable_posterior,
-            "verbose": self.verbose
-        }
-
-    def posterior(self, x):
-
-        if not self.enable_posterior:
-            raise ValueError("Cannot compute posteriors with this model. "
-                             "Set `enable_posterior` to True to enable "
-                             "posterior computation.")
-
-        if x.ndim > 1:
-            raise ValueError("x.ndim must be 1")
-
-        leaves_x = self.rf.apply(x[None, :])[0]
-
-        return _posterior(
-            self.data_leaves, self.data_weights,
-            self.data_y, leaves_x
-        )
-
-
-def _posterior(data_leaves, data_weights, data_y, query_leaves):
-
-    weights_x = (query_leaves[:, None] == data_leaves) * data_weights
-    weights_x = _as_smallest_udtype(weights_x.sum(0))
-
-    # Remove samples with weight zero
-    mask = weights_x != 0
-    samples = data_y[mask]
-    weights = weights_x[mask].astype(np.uint)
-
-    return Posterior(samples, weights)
-
-
-def _posterior_percentile_nocache(data_leaves, data_weights, data_y,
-                                  query_leaves, percentile):
-
-    values = []
-
-    logger.info("Computing percentiles...")
-    for leaves_x_i in tqdm(query_leaves):
-        posterior = _posterior(
-            data_leaves, data_weights,
-            data_y, leaves_x_i
-        )
-        samples = np.repeat(posterior.samples, posterior.weights, axis=0)
-        value = np.percentile(samples, percentile, axis=0)
-        values.append(value)
-
-    return np.array(values)
-
-
-def _posterior_percentile_cache(data_leaves, data_weights, data_y,
-                                query_leaves, percentile):
-
-    # Build a dictionary for fast access of the contents of the leaves.
-    logger.info("Building cache...")
-    cache = [
-        _build_leaves_cache(leaves_i, weights_i)
-        for leaves_i, weights_i in zip(data_leaves, data_weights)
-    ]
-
-    values = []
-    # Check the contents of the leaves in leaves_x
-    logger.info("Computing percentiles...")
-    for leaves_x_i in tqdm(query_leaves):
-        data_elements = []
-        for tree, leaves_x_i_j in enumerate(leaves_x_i):
-            aux = cache[tree][leaves_x_i_j]
-            data_elements.extend(aux)
-        value = np.percentile(data_y[data_elements], percentile, axis=0)
-        values.append(value)
-
-    return np.array(values)
-
-
-def _build_leaves_cache(leaves, weights):
-
-    result = {}
-    for index, (leaf, weight) in enumerate(zip(leaves, weights)):
-        if weight == 0:
-            continue
-
-        if leaf not in result:
-            result[leaf] = [index] * weight
-        else:
-            result[leaf].extend([index] * weight)
-
-    return result
-
-
-def _generate_sample_indices(random_state, n_samples):
-    random_instance = check_random_state(random_state)
-    sample_indices = random_instance.randint(0, n_samples, n_samples)
-
-    return sample_indices
-
-
-def _tree_weights(tree, n_samples):
-    indices = _generate_sample_indices(tree.random_state, n_samples)
-    res = np.bincount(indices, minlength=n_samples)
-    return _as_smallest_udtype(res)
-
-
-def _as_smallest_udtype(arr):
-    return arr.astype(_smallest_udtype(arr.max()))
-
-
-def _smallest_udtype(value):
-
-    dtypes = [np.uint8, np.uint16, np.uint32, np.uint64]
-
-    for dtype in dtypes:
-        if value <= np.iinfo(dtype).max:
-            return dtype
-
-    raise ValueError("value is too large for any dtype")
diff --git a/rfretrieval.py b/rfretrieval.py
index 662265f..bde664a 100644
--- a/rfretrieval.py
+++ b/rfretrieval.py
@@ -7,122 +7,204 @@
 from sklearn import metrics, multioutput
 import joblib
 
-from dataset import load_dataset, load_data_file
-from models import Model
-from utils import config_logger
-from wpercentile import wpercentile
-import plot
+import hela
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 
-def train_model(dataset, num_trees, num_jobs, verbose=1):
-    pipeline = Model(num_trees, num_jobs,
-                     names=dataset.names,
-                     ranges=dataset.ranges,
-                     colors=dataset.colors,
-                     verbose=verbose)
+def train_model(
+        dataset,
+        num_trees,
+        min_impurity_decrease,
+        num_jobs,
+        verbose=1):
+
+    pipeline = hela.Model(
+        num_trees=num_trees,
+        min_impurity_decrease=min_impurity_decrease,
+        num_jobs=num_jobs,
+        verbose=verbose
+    )
     pipeline.fit(dataset.training_x, dataset.training_y)
     return pipeline
 
 
 def test_model(model, dataset, output_path):
-    
+
     if dataset.testing_x is None:
         return
-    
-    logger.info("Testing model...")
+
+    LOGGER.info("Testing model...")
     pred = model.predict(dataset.testing_x)
     # pred = model.predict_median(dataset.testing_x)
-    r2scores = {name_i: metrics.r2_score(real_i, pred_i)
-                for name_i, real_i, pred_i in zip(dataset.names, dataset.testing_y.T, pred.T)}
+    r2scores = {
+        name_i: metrics.r2_score(real_i, pred_i)
+        for name_i, real_i, pred_i in zip(dataset.names,
+                                          dataset.testing_y.T,
+                                          pred.T)
+    }
     print("Testing scores:")
     for name, values in r2scores.items():
         print("\tR^2 score for {}: {:.3f}".format(name, values))
-    
-    logger.info("Plotting testing results...")
-    fig = plot.predicted_vs_real(dataset.testing_y, pred, dataset.names, dataset.ranges)
+
+    LOGGER.info("Plotting testing results...")
+    fig = hela.predicted_vs_real(
+        dataset.testing_y,
+        pred,
+        names=dataset.names,
+        ranges=dataset.ranges
+    )
     fig.savefig(os.path.join(output_path, "predicted_vs_real.pdf"),
                 bbox_inches='tight')
 
 
-def compute_feature_importance(model, dataset, output_path):
-    
-    logger.info("Computing feature importance for individual parameters...")
+def _plot_feature_importances(model, dataset, output_path):
+
+    LOGGER.info("Computing feature importances for individual parameters...")
     regr = multioutput.MultiOutputRegressor(model, n_jobs=1)
     regr.fit(dataset.training_x, dataset.training_y)
-    
-    fig = plot.feature_importances(forests=[i.rf for i in regr.estimators_] + [model.rf],
-                                   names=dataset.names + ["joint prediction"],
-                                   colors=dataset.colors + ["C0"])
-    
+
+    fig = hela.plot.feature_importances(
+        forests=(
+            [i.random_forest for i in regr.estimators_]
+            + [model.random_forest]
+        ),
+        names=dataset.names + ["joint prediction"],
+        colors=dataset.colors + ["C0"]
+    )
+
     fig.savefig(os.path.join(output_path, "feature_importances.pdf"),
                 bbox_inches='tight')
 
 
+def _plot_feature_importances_breakdown(model, dataset, output_path):
+
+    LOGGER.info("Computing feature importances per output...")
+    importances = hela.importances_per_output(
+        model.random_forest,
+        dataset.training_x,
+        model.scaler_transform(dataset.training_y)
+    )
+
+    fig = hela.plot.stacked_feature_importances(
+        importances,
+        dataset.names,
+        dataset.colors
+    )
+
+    fig.tight_layout()
+    fig.savefig(
+        os.path.join(output_path, "feature_importances_breakdown.pdf"),
+        bbox_inches='tight'
+    )
+
+
+def _compute_best_fit(model, training_x, query, out_filename):
+
+    posterior_x = model.posterior(query, prior_samples=training_x)
+    best_fit = hela.posterior_percentile(posterior_x, [50, 16, 84])
+    np.save(out_filename, best_fit)
+
+
 def data_ranges(posterior, percentiles=(50, 16, 84)):
-    
-    samples, weights = posterior
-    values = wpercentile(samples, weights, percentiles, axis=0)
+
+    values = hela.posterior_percentile(posterior, percentiles)
     ranges = np.array([values[0], values[2]-values[0], values[0]-values[1]])
     return ranges.T
 
 
-def main_train(training_dataset, model_path,
-               num_trees, num_jobs,
-               feature_importance, quiet,
-               **_):
-    
-    logger.info("Loading dataset '{}'...".format(training_dataset))
-    dataset = load_dataset(training_dataset)
-    
-    logger.info("Training model...")
-    model = train_model(dataset, num_trees, num_jobs, not quiet)
-    
+def main_train(
+        training_dataset,
+        model_path,
+        num_trees,
+        min_impurity_decrease,
+        num_jobs,
+        feature_importances,
+        feature_importances_breakdown,
+        quiet,
+        **_):
+
+    LOGGER.info("Loading dataset '%s'...", training_dataset)
+    dataset = hela.load_dataset(training_dataset)
+
+    LOGGER.info("Training model...")
+    model = train_model(
+        dataset=dataset,
+        num_trees=num_trees,
+        min_impurity_decrease=min_impurity_decrease,
+        num_jobs=num_jobs,
+        verbose=not quiet)
+
     os.makedirs(model_path, exist_ok=True)
     model_file = os.path.join(model_path, "model.pkl")
-    logger.info("Saving model to '{}'...".format(model_file))
+    LOGGER.info("Saving model to '%s'...", model_file)
     joblib.dump(model, model_file)
-    
-    logger.info("Printing model information...")
-    print("OOB score: {:.4f}".format(model.rf.oob_score_))
-    
+
+    LOGGER.info("Printing model information...")
+    print("OOB score: {:.4f}".format(model.random_forest.oob_score_))
+
     test_model(model, dataset, model_path)
-    
-    if feature_importance:
+
+    if feature_importances:
         model.enable_posterior = False
-        compute_feature_importance(model, dataset, model_path)
+        _plot_feature_importances(model, dataset, model_path)
+
+    if feature_importances_breakdown:
+        _plot_feature_importances_breakdown(model, dataset, model_path)
+
 
+def main_predict(
+        training_dataset,
+        model_path,
+        data_file,
+        output_path,
+        plot_posterior,
+        best_fit_model,
+        **_):
+
+    LOGGER.info("Loading dataset '%s'...", training_dataset)
+    dataset = hela.load_dataset(training_dataset, load_testing_data=False)
 
-def main_predict(model_path, data_file, output_path, plot_posterior, **_):
-    
     model_file = os.path.join(model_path, "model.pkl")
-    logger.info("Loading random forest from '{}'...".format(model_file))
-    model = joblib.load(model_file)
-    
-    logger.info("Loading data from '{}'...".format(data_file))
-    data, _ = load_data_file(data_file, model.rf.n_features_)
-    
+    LOGGER.info("Loading random forest from '%s'...", model_file)
+    model: hela.Model = joblib.load(model_file)
+
+    LOGGER.info("Loading data from '%s'...", data_file)
+    data, _ = hela.load_data_file(data_file, model.random_forest.n_features_)
+
+    os.makedirs(output_path, exist_ok=True)
+
     posterior = model.posterior(data[0])
-    
+
     posterior_ranges = data_ranges(posterior)
-    for name_i, pred_range_i in zip(model.names, posterior_ranges):
-        print("Prediction for {}: {:.3g} [+{:.3g} -{:.3g}]".format(name_i, *pred_range_i))
-    
+    for name_i, pred_range_i in zip(dataset.names, posterior_ranges):
+        format_str = "Prediction for {}: {:.3g} [+{:.3g} -{:.3g}]"
+        print(format_str.format(name_i, *pred_range_i))
+
     if plot_posterior:
-        logger.info("Plotting the posterior matrix...")
-        
-        fig = plot.posterior_matrix(
+        LOGGER.info("Plotting the posterior matrix...")
+
+        fig = hela.plot.posterior_matrix(
             posterior,
-            names=model.names,
-            ranges=model.ranges,
-            colors=model.colors
+            names=dataset.names,
+            ranges=dataset.ranges,
+            colors=dataset.colors
         )
         os.makedirs(output_path, exist_ok=True)
-        logger.info("Saving the figure....")
+        LOGGER.info("Saving the figure....")
         fig.savefig(os.path.join(output_path, "posterior_matrix.pdf"),
                     bbox_inches='tight')
-        logger.info("Done.")
+        LOGGER.info("Done.")
+
+    if best_fit_model:
+        LOGGER.info("Computing and saving best fit...")
+        _compute_best_fit(
+            model,
+            dataset.training_x,
+            data[0],
+            os.path.join(output_path, "best_fit.npy")
+        )
+        LOGGER.info("Done.")
 
 
 def show_usage(parser, **_):
@@ -130,38 +212,77 @@ def show_usage(parser, **_):
 
 
 def main():
-    
-    parser = argparse.ArgumentParser(description="rfretrieval: Atmospheric retrieval with random forests.")
+
+    parser = argparse.ArgumentParser(
+        description="rfretrieval: Atmospheric retrieval with random forests."
+    )
     parser.set_defaults(func=show_usage, parser=parser)
     parser.add_argument("--quiet", action='store_true')
     subparsers = parser.add_subparsers()
-    
+
     parser_train = subparsers.add_parser('train', help="train a model")
-    parser_train.add_argument("training_dataset", type=str,
-                              help="JSON file with the training dataset description")
-    parser_train.add_argument("model_path", type=str,
-                              help="path where the trained model will be saved")
-    parser_train.add_argument("--num-trees", type=int, default=1000,
-                              help="number of trees in the forest")
-    parser_train.add_argument("--num-jobs", type=int, default=5,
-                              help="number of parallel jobs for fitting the random forest")
-    parser_train.add_argument("--feature-importance", action='store_true',
-                              help="compute feature importances after training")
     parser_train.set_defaults(func=main_train)
-    
-    parser_test = subparsers.add_parser('predict', help="use a trained model to perform a prediction")
+    parser_train.add_argument(
+        "training_dataset", type=str,
+        help="JSON file with the training dataset description"
+    )
+    parser_train.add_argument(
+        "model_path", type=str,
+        help="path where the trained model will be saved"
+    )
+    parser_train.add_argument(
+        "--num-trees", type=int, default=1000,
+        help="number of trees in the forest"
+    )
+    parser_train.add_argument(
+        "--min-impurity-decrease", type=float, default=0.01,
+        help="minimum impurity decrease to split a node in each tree"
+    )
+    parser_train.add_argument(
+        "--num-jobs", type=int, default=5,
+        help="number of parallel jobs for fitting the random forest"
+    )
+    parser_train.add_argument(
+        "--feature-importances", action='store_true',
+        help="plot feature importances after training"
+    )
+    parser_train.add_argument(
+        "--feature-importances-breakdown", action='store_true',
+        help="plot feature importances per output after training"
+    )
+
+    parser_test = subparsers.add_parser(
+        'predict',
+        help="use a trained model to perform a prediction"
+    )
     parser_test.set_defaults(func=main_predict)
-    parser_test.add_argument("model_path", type=str,
-                             help="path to the trained model")
-    parser_test.add_argument("data_file", type=str,
-                             help="NPY file with the data for the prediction")
-    parser_test.add_argument("output_path", type=str,
-                             help="path to write the results of the prediction")
-    parser_test.add_argument("--plot-posterior", action='store_true',
-                             help="plot and save the scatter matrix of the posterior distribution")
-    
+    parser_test.add_argument(
+        "training_dataset", type=str,
+        help="JSON file with the training dataset description"
+    )
+    parser_test.add_argument(
+        "model_path", type=str,
+        help="path to the trained model"
+    )
+    parser_test.add_argument(
+        "data_file", type=str,
+        help="NPY file with the data for the prediction"
+    )
+    parser_test.add_argument(
+        "output_path", type=str,
+        help="path to write the results of the prediction"
+    )
+    parser_test.add_argument(
+        "--plot-posterior", action='store_true',
+        help="plot and save the scatter matrix of the posterior distribution"
+    )
+    parser_test.add_argument(
+        "--best-fit-model", action='store_true',
+        help="save the best fit model"
+    )
+
     args = parser.parse_args()
-    config_logger(level=logging.WARNING if args.quiet else logging.INFO)
+    hela.config_logger(level=logging.WARNING if args.quiet else logging.INFO)
     args.func(**vars(args))
 
 
diff --git a/wpercentile.py b/wpercentile.py
deleted file mode 100644
index f030db9..0000000
--- a/wpercentile.py
+++ /dev/null
@@ -1,64 +0,0 @@
-
-import numpy as np
-
-__all__ = [
-    "wpercentile",
-    "wmedian"
-]
-
-
-def _wpercentile1d(data, weights, percentiles):
-    
-    if data.ndim > 1 or weights.ndim > 1:
-        raise ValueError("data and weights must be one-dimensional arrays")
-    
-    if data.shape != weights.shape:
-        raise ValueError("data and weights must have the same shape")
-    
-    data = np.asarray(data)
-    weights = np.asarray(weights)
-    percentiles = np.asarray(percentiles)
-    
-    sort_indices = np.argsort(data)
-    sorted_data = data[sort_indices]
-    sorted_weights = weights[sort_indices]
-    
-    cumsum_weights = np.cumsum(sorted_weights)
-    sum_weights = cumsum_weights[-1]
-    
-    pn = 100 * (cumsum_weights - 0.5*sorted_weights) / sum_weights
-    
-    return np.interp(percentiles, pn, sorted_data)
-
-
-def wpercentile(data, weights, percentiles, axis=None):
-    """
-    Compute percentiles of a weighted sample.
-    """
-    if axis is None:
-        data = np.ravel(data)
-        weights = np.ravel(weights)
-        return _wpercentile1d(data, weights, percentiles)
-    
-    axis = np.atleast_1d(axis)
-    
-    # Reshape the arrays for proper computation
-    # Move the requested axis to the final dimensions
-    dest_axis = list(range(len(axis)))
-    data2 = np.moveaxis(data, axis, dest_axis)
-    
-    ndim = len(axis)
-    shape = data2.shape
-    newshape = (np.prod(shape[:ndim]), np.prod(shape[ndim:]))
-    newdata = np.reshape(data2, newshape)
-    newweights = np.reshape(weights, newshape[0])
-    
-    result = np.apply_along_axis(_wpercentile1d, 0, newdata, newweights, percentiles)
-    
-    final_shape = (*np.shape(percentiles), *shape[ndim:])
-    return np.reshape(result, final_shape)
-
-
-def wmedian(data, weights, axis=None):
-    """Compute the weighted median."""
-    return wpercentile(data, weights, 50, axis)