diff --git a/proteinshake/frontend/dataloader.py b/proteinshake/backend/adapters/__baseclass__.py
similarity index 100%
rename from proteinshake/frontend/dataloader.py
rename to proteinshake/backend/adapters/__baseclass__.py
diff --git a/proteinshake/backend/collection.py b/proteinshake/backend/collection.py
index e69de29b..42d4d2e3 100644
--- a/proteinshake/backend/collection.py
+++ b/proteinshake/backend/collection.py
@@ -0,0 +1,25 @@
+class Collection:
+    """
+    Holds a set of proteins as the result of a database query and prepares it for dataset creation.
+    """
+
+    def __init__(self, proteins: list[dict]) -> None:
+        pass
+
+    def add(self, metadata: Any) -> None:
+        """
+        Adds any kind of metadata to the collection, such as split indices.
+        """
+        pass
+
+    def save(self, name: str) -> None:
+        """
+        Saves the proteins and meta data in compressed format.
+        """
+        pass
+
+    def upload(self, version: str = None) -> None:
+        """
+        Uploads the collection and meta data to Zenodo. `version` defaults to the current date.
+        """
+        pass
diff --git a/proteinshake/backend/database.py b/proteinshake/backend/database.py
index e69de29b..0021b8e9 100644
--- a/proteinshake/backend/database.py
+++ b/proteinshake/backend/database.py
@@ -0,0 +1,13 @@
+from pathlib import Path
+from .collection import Collection
+
+
+class Database:
+    def __init__(self, storage: Path) -> None:
+        pass
+
+    def update(self) -> None:
+        pass
+
+    def query(self, query: str) -> Collection:
+        pass
diff --git a/proteinshake/backend/protein.py b/proteinshake/backend/protein.py
index e69de29b..bc150cb6 100644
--- a/proteinshake/backend/protein.py
+++ b/proteinshake/backend/protein.py
@@ -0,0 +1,4 @@
+class Protein:
+    """
+    Takes a result row from a database query and converts it to a dictionary.
+    """
diff --git a/proteinshake/backend/structure.py b/proteinshake/backend/structure.py
new file mode 100644
index 00000000..19aee811
--- /dev/null
+++ b/proteinshake/backend/structure.py
@@ -0,0 +1,4 @@
+class Structure:
+    """
+    Takes a pdb/mmcif file and converts it to a compressed data format.
+    """
diff --git a/proteinshake/tasks/README.md b/proteinshake/frontend/README.md
similarity index 98%
rename from proteinshake/tasks/README.md
rename to proteinshake/frontend/README.md
index 34e66bad..ae992b8c 100644
--- a/proteinshake/tasks/README.md
+++ b/proteinshake/frontend/README.md
@@ -1,3 +1,7 @@
+## Dataset API
+
+tba
+
 ## Task API
 
 A task brings three objects: `Splitter`, `Target`, `Evaluator` to a given `proteinshake.Dataset` instance.
diff --git a/proteinshake/frontend/dataset.py b/proteinshake/frontend/dataset.py
new file mode 100644
index 00000000..81ba5f14
--- /dev/null
+++ b/proteinshake/frontend/dataset.py
@@ -0,0 +1,54 @@
+class Dataset:
+    def __init__(
+        self,
+        path: Path,
+        version: str = "latest",
+        shard_size: int = None,
+        batch_size: int = None,
+        shuffle: bool = False,
+        random_seed: int = 42,
+    ) -> None:
+        """
+        Takes a compressed collection and applies transforms.
+        `path` is either pointing to a Zenodo repository or a directory in the local filesystem.
+        """
+        pass
+
+    def to_graph(
+        self,
+        pre_transform: PreRepresentationTransform = None,
+        post_transform: PostRepresentationTransform = None,
+        **kwargs
+    ) -> Dataset:
+        """
+        Applies pre/representation/post transforms to all proteins in the dataset.
+        """
+        self.proteins.apply(pre_transform)
+        self.proteins.apply(GraphTransform(**kwargs))
+        self.proteins.apply(post_transform)
+        return self
+
+    def pyg(
+        self,
+        pre_transform: PreFrameworkTransform = None,
+        post_transform: PostFrameworkTransform = None,
+        **kwargs
+    ) -> Generic:
+        """
+        Creates an iterable that wraps around __next__ or __getitem__ and applies pre/framework/post transforms.
+        Returns a framework-specific dataset instance (iterable-style if sharded, map-style if in-memory or on-disk).
+        """
+        pass
+
+    def __next__(self) -> None:
+        """
+        Yields the next protein from a shard. When the shard is finished, loads the next one.
+        If `shuffle` is True, loads a random shard and applies shuffling within the shard.
+        """
+        pass
+
+    def __getitem__(self, index: Union[int, list, tuple, ndarray]) -> None:
+        """
+        Returns the indexed proteins. Not available with sharding for performance reasons.
+        """
+        pass
diff --git a/proteinshake/frontend/datasets/dataset.py b/proteinshake/frontend/datasets/dataset.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/evaluators/classification.py b/proteinshake/frontend/evaluators/classification.py
index e69de29b..e6bcb918 100644
--- a/proteinshake/frontend/evaluators/classification.py
+++ b/proteinshake/frontend/evaluators/classification.py
@@ -0,0 +1,4 @@
+class ClassificationEvaluator(Evaluator):
+    def __call__(self, pred : list, truth: list):
+        return {'accuracy': sklearn.accuracy(pred, truth)}
+        pass
diff --git a/proteinshake/tasks/evaluator.py b/proteinshake/frontend/evaluators/evaluator.py
similarity index 100%
rename from proteinshake/tasks/evaluator.py
rename to proteinshake/frontend/evaluators/evaluator.py
diff --git a/proteinshake/frontend/protein.py b/proteinshake/frontend/protein.py
new file mode 100644
index 00000000..86fc1a52
--- /dev/null
+++ b/proteinshake/frontend/protein.py
@@ -0,0 +1,4 @@
+class Protein:
+    """
+    Takes a (compressed) collection protein and converts it to an uncompressed protein dictionary.
+    """
diff --git a/proteinshake/frontend/splitters/attribute.py b/proteinshake/frontend/splitters/attribute.py
new file mode 100644
index 00000000..7e729b58
--- /dev/null
+++ b/proteinshake/frontend/splitters/attribute.py
@@ -0,0 +1,14 @@
+class AttributeSplitter(Splitter):
+    """
+    Compute splits based on an attribute that already exists in the dataset
+    """
+
+    def __init__(
+        self, train_attribute: str, val_attribute: str, test_attribute: str
+    ) -> None:
+        self.train_attribute = train_attribute
+        self.val_attribute = val_attribute
+        self.test_attribute = test_attribute
+
+    def __call__(self, dataset) -> tuple[list, list, list]:
+        pass
diff --git a/proteinshake/frontend/splitters/from_existing.py b/proteinshake/frontend/splitters/from_existing.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/splitters/pairwise_attribute.py b/proteinshake/frontend/splitters/pairwise_attribute.py
new file mode 100644
index 00000000..f90b9258
--- /dev/null
+++ b/proteinshake/frontend/splitters/pairwise_attribute.py
@@ -0,0 +1,17 @@
+class PairwiseAttributeSplitter(Splitter):
+    """Compute pairwise splits based on an attribute that already exists in the dataset.
+    Takes all pairs of train/val/test in the single attribute splitting setting."""
+
+    def __init__(
+        self, train_attribute: str, val_attribute: str, test_attribute: str
+    ) -> None:
+        self.train_attribute = train_attribute
+        self.val_attribute = val_attribute
+        self.test_attribute = test_attribute
+
+    def __call__(self, dataset) -> tuple[list, list, list]:
+        tmp_splitter = AttributeSplitter(
+            self.train_attribute, self.val_attribute, self.test_attribute
+        )
+        # compute pairs of indices on the non-paired splits
+        pass
diff --git a/proteinshake/frontend/splitters/sequence_similarity.py b/proteinshake/frontend/splitters/sequence_similarity.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/tasks/splitter.py b/proteinshake/frontend/splitters/splitter.py
similarity index 59%
rename from proteinshake/tasks/splitter.py
rename to proteinshake/frontend/splitters/splitter.py
index 93988c4a..9560e7a8 100644
--- a/proteinshake/tasks/splitter.py
+++ b/proteinshake/frontend/splitters/splitter.py
@@ -1,5 +1,7 @@
 class Splitter:
-""" Abstract class for selecting train/val/test indices given a dataset.
-"""
+    """
+    Abstract class for selecting train/val/test indices given a dataset.
+    """
+
     def __call__(self, dataset) -> tuple[list, list, list]:
         raise NotImplementedError
diff --git a/proteinshake/frontend/splitters/structure_similarity.py b/proteinshake/frontend/splitters/structure_similarity.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/tasks/pairwise_property_target.py b/proteinshake/frontend/targets/pairwise_property_target.py
similarity index 100%
rename from proteinshake/tasks/pairwise_property_target.py
rename to proteinshake/frontend/targets/pairwise_property_target.py
diff --git a/proteinshake/tasks/property_target.py b/proteinshake/frontend/targets/property_target.py
similarity index 100%
rename from proteinshake/tasks/property_target.py
rename to proteinshake/frontend/targets/property_target.py
diff --git a/proteinshake/tasks/target.py b/proteinshake/frontend/targets/target.py
similarity index 100%
rename from proteinshake/tasks/target.py
rename to proteinshake/frontend/targets/target.py
diff --git a/proteinshake/frontend/task.py b/proteinshake/frontend/task.py
new file mode 100644
index 00000000..326248f7
--- /dev/null
+++ b/proteinshake/frontend/task.py
@@ -0,0 +1,30 @@
+class Task:
+    """
+    Abstract class for Tasks. A task contains the logic for splitting, target generation, and evaluation.
+    Optionally, we can consider the Task as a way of syncing with a paperwithcode instance https://github.com/paperswithcode/paperswithcode-client.
+    """
+
+    def __init__(
+        self,
+        dataset: proteinshake.Dataset,
+        splitter: proteinshake.Splitter,
+        target: proteinshake.Target,
+        evaluator: proteinshake.Evaluator,
+        task_id: int,
+    ) -> None:
+        self.dataset = dataset
+        self.train_idx = splitter.train_idx()
+        self.val_idx = splitter.val_idx()
+        self.test_idx = splitter.test_idx()
+
+        self.task_id = task_id
+
+        self.target = target
+        self.evaluator = evaluator
+        pass
+
+    def leaderboard_fetch(self):
+        """Load current leaderboard results for this task"""
+
+        if not self.task_id is None:
+            return get_leaderboard(f"https:/paperswithcode.com/sota/{self.task_id}")
diff --git a/proteinshake/frontend/tasks/pairwise_protein.py b/proteinshake/frontend/tasks/pairwise_protein.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/tasks/pairwise_residue.py b/proteinshake/frontend/tasks/pairwise_residue.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/tasks/protein.py b/proteinshake/frontend/tasks/protein.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/tasks/residue.py b/proteinshake/frontend/tasks/residue.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/frontend/tasks/task.py b/proteinshake/frontend/tasks/task.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/proteinshake/transforms/framework/torch.py b/proteinshake/frontend/transforms/framework/torch.py
similarity index 100%
rename from proteinshake/transforms/framework/torch.py
rename to proteinshake/frontend/transforms/framework/torch.py
diff --git a/proteinshake/transforms/post_framework/note.md b/proteinshake/frontend/transforms/post_framework/note.md
similarity index 100%
rename from proteinshake/transforms/post_framework/note.md
rename to proteinshake/frontend/transforms/post_framework/note.md
diff --git a/proteinshake/transforms/post_representation/add_node_degree.py b/proteinshake/frontend/transforms/post_representation/add_node_degree.py
similarity index 100%
rename from proteinshake/transforms/post_representation/add_node_degree.py
rename to proteinshake/frontend/transforms/post_representation/add_node_degree.py
diff --git a/proteinshake/transforms/pre_framework/random_residue_masking.py b/proteinshake/frontend/transforms/pre_framework/random_residue_masking.py
similarity index 100%
rename from proteinshake/transforms/pre_framework/random_residue_masking.py
rename to proteinshake/frontend/transforms/pre_framework/random_residue_masking.py
diff --git a/proteinshake/transforms/pre_representation/residue_level.py b/proteinshake/frontend/transforms/pre_representation/residue_level.py
similarity index 100%
rename from proteinshake/transforms/pre_representation/residue_level.py
rename to proteinshake/frontend/transforms/pre_representation/residue_level.py
diff --git a/proteinshake/transforms/representation/point.py b/proteinshake/frontend/transforms/representation/point.py
similarity index 100%
rename from proteinshake/transforms/representation/point.py
rename to proteinshake/frontend/transforms/representation/point.py
diff --git a/proteinshake/tasks/attribute_splitter.py b/proteinshake/tasks/attribute_splitter.py
deleted file mode 100644
index aa8e3f9d..00000000
--- a/proteinshake/tasks/attribute_splitter.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from .splitter import Splitter
-
-class AttributeSplitter(Splitter):
-    """ Compute splits based on an attribute that already exists in the dataset"""
-    def __init__(self,
-                 train_attribute: str,
-                 val_attribute: str,
-                 test_attribute: str) -> None:
-
-        self.train_attribute = train_attribute 
-        self.val_attribute = val_attribute 
-        self.test_attribute = test_attribute 
-
-    def __call__(self, dataset) -> tuple[list, list, list]:
-        pass
-    pass
diff --git a/proteinshake/tasks/classification_evaluator.py b/proteinshake/tasks/classification_evaluator.py
deleted file mode 100644
index e6bcb918..00000000
--- a/proteinshake/tasks/classification_evaluator.py
+++ /dev/null
@@ -1,4 +0,0 @@
-class ClassificationEvaluator(Evaluator):
-    def __call__(self, pred : list, truth: list):
-        return {'accuracy': sklearn.accuracy(pred, truth)}
-        pass
diff --git a/proteinshake/tasks/pairwise_attribute_splitter.py b/proteinshake/tasks/pairwise_attribute_splitter.py
deleted file mode 100644
index 86762c65..00000000
--- a/proteinshake/tasks/pairwise_attribute_splitter.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from .splitter import Splitter
-
-class PairwiseAttributeSplitter(Splitter):
-    """ Compute pairwise splits based on an attribute that already exists in the dataset.
-    Takes all pairs of train/val/test in the single attribute splitting setting."""
-    def __init__(self,
-                 train_attribute: str,
-                 val_attribute: str,
-                 test_attribute: str) -> None:
-
-        self.train_attribute = train_attribute 
-        self.val_attribute = val_attribute 
-        self.test_attribute = test_attribute 
-
-    def __call__(self, dataset) -> tuple[list, list, list]:
-        tmp_splitter = AttributeSplitter(self.train_attribute,
-                                         self.val_attribute,
-                                         self.test_attribute)
-        # compute pairs of indices on the non-paired splits
-        pass
-    pass
diff --git a/proteinshake/tasks/task.py b/proteinshake/tasks/task.py
deleted file mode 100644
index 11456ad6..00000000
--- a/proteinshake/tasks/task.py
+++ /dev/null
@@ -1,29 +0,0 @@
-"""
-Abstract class for Tasks. A task contains the logic for splitting, target generation, and evaluation.
-Optionally, we can consider the Task as a way of syncing with a paperwithcode instance https://github.com/paperswithcode/paperswithcode-client.
-"""
-
-class Task:
-    def __init__(self, dataset: proteinshake.Dataset,
-                       splitter : proteinshake.Splitter,
-                       target: proteinshake.Target,
-                       evaluator: proteinshake.Evaluator,
-                       task_id: int,
-                 ) -> None:
-        self.dataset = dataset
-        self.train_idx = splitter.train_idx()
-        self.val_idx = splitter.val_idx()
-        self.test_idx = splitter.test_idx()
-
-        self.task_id = task_id
-
-        self.target = target
-        self.evaluator = evaluator
-        pass
-
-    def leaderboard_fetch(self):
-        """ Load current leaderboard results for this task
-        """
-
-        if not self.task_id is None:
-            return get_leaderboard(f"https:/paperswithcode.com/sota/{self.task_id}")
diff --git a/proteinshake/tasks/time_splitter.py b/proteinshake/tasks/time_splitter.py
deleted file mode 100644
index 3450cefc..00000000
--- a/proteinshake/tasks/time_splitter.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from .splitter import Splitter
-
-class TimeSplitter(Splitter):
-    """ Compute splits based on an structure publication date which should be an attribute
-    in the Dataset"""
-    def __init__(self,
-                 train_cutoff: int,
-                 val_cutoff: int) -> None:
-
-        self.train_cutoff = train_cutoff
-        self.val_cutoff = val_cutoff 
-
-    def __call__(self, dataset) -> tuple[list, list, list]:
-        pass
-    pass