diff --git a/.bumpversion.cfg b/.bumpversion.cfg
new file mode 100644
index 00000000..87d247bd
--- /dev/null
+++ b/.bumpversion.cfg
@@ -0,0 +1,6 @@
+[bumpversion]
+current_version = 0.1
+commit = True
+tag = True
+
+[bumpversion:file:setup.py]
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..fa60c52e
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,6 @@
+# Include the README
+include *.md
+
+# Include the license file
+include LICENSE
+
diff --git a/README.md b/README.md
index 118757cf..1251d039 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,9 @@
 # hepdata_lib
 Library for getting your data into HEPData
+
+## External dependencies
+
+- [ROOT](https://root.cern.ch)
+- [ImageMagick](https://www.imagemagick.org)
+
+Make sure that you have `ROOT` in your `$PYTHONPATH` and that the `convert` command is available by adding its location to your `$PATH` if needed.
diff --git a/hepdata_lib/__init__.py b/hepdata_lib/__init__.py
new file mode 100644
index 00000000..c6b4e566
--- /dev/null
+++ b/hepdata_lib/__init__.py
@@ -0,0 +1,504 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import os
+import fnmatch
+import yaml
+import ROOT as r
+from collections import defaultdict
+import math
+import numpy as np
+import subprocess
+
+
+# Register defalut dict so that yaml knows it is a dictionary type
+from yaml.representer import Representer
+yaml.add_representer(defaultdict, Representer.represent_dict)
+
+
+def execute_command(command):
+    """execute shell command using subprocess..."""
+    proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE, shell=True, universal_newlines=True)
+    result = ""
+    exit_code = proc.wait()
+    if exit_code != 0:
+        for line in proc.stderr:
+            result = result + line
+        raise RuntimeError(result)
+
+
+def find_all_matching(path, pattern):
+    """Utility function that works like 'find' in bash."""
+    if not os.path.exists(path):
+        raise RuntimeError("Invalid path '{0}'".format(path))
+    result = []
+    for root, dirs, files in os.walk(path):
+        for thisfile in files:
+            if fnmatch.fnmatch(thisfile, pattern):
+                result.append(os.path.join(root, thisfile))
+    return result
+
+
+def relative_round(value, relative_digits):
+    """Rounds to a given relative precision"""
+    if(value == 0):
+        return 0
+    if(type(value) == str) or np.isnan(value):
+        return value
+
+    value_precision = math.ceil(math.log10(abs(value)))
+
+    absolute_digits = - value_precision + relative_digits
+    if(absolute_digits < 0):
+        absolute_digits = 0
+
+    return round(value, int(absolute_digits))
+
+
+class Variable(object):
+    """A Variable is a wrapper for a list of values + some meta data."""
+    # pylint: disable=too-many-instance-attributes
+    # Eight is reasonable in this case.
+
+    def __init__(self, name, is_independent=True, is_binned=True, units=""):
+        self.name = name
+        self.is_independent = is_independent
+        self.is_binned = is_binned
+        self.qualifiers = []
+
+        self.units = units
+        self.values = []
+        self.uncertainties = []
+        self.digits = 5
+
+    def set_values(self, values):
+        if(self.is_binned):
+            self._values = [(float(x[0]), float(x[1])) for x in values]
+        else:
+            self._values = [x if type(x) ==
+                            str else float(x) for x in values]
+
+    def get_values(self):
+        return self._values
+
+    values = property(get_values, set_values)
+
+    def scale_values(self, factor):
+        """Multiply each value by constant factor. Also applies to uncertainties."""
+        if(not self.is_binned):
+            self.set_values([factor * x for x in self.get_values()])
+        else:
+            self.set_values([(factor * x[0], factor * x[1])
+                             for x in self.get_values()])
+
+        for unc in self.uncertainties:
+            unc.scale_values(factor)
+
+    def add_qualifier(self, name, value, units=""):
+        qualifier = {}
+        qualifier["name"] = name
+        qualifier["value"] = value  # if type(value) == str else float(value)
+        if units:
+            qualifier["units"] = units
+        self.qualifiers.append(qualifier)
+
+    def make_dict(self):
+        tmp = {}
+        tmp["header"] = {"name": self.name, "units": self.units}
+
+        if self.qualifiers:
+            tmp["qualifiers"] = self.qualifiers
+
+        tmp["values"] = []
+
+        for i in range(len(self.values)):
+            valuedict = defaultdict(list)
+
+            if self.is_binned:
+                valuedict["low"] = relative_round(
+                    self.values[i][0], self.digits)
+                valuedict["high"] = relative_round(
+                    self.values[i][1], self.digits)
+            else:
+                valuedict["value"] = relative_round(
+                    self.values[i], self.digits)
+
+            for unc in self.uncertainties:
+                if unc.is_symmetric:
+                    valuedict['errors'].append({"symerror": relative_round(unc.values[i], self.digits),
+                                                "label": unc.label})
+                else:
+                    valuedict['errors'].append({"asymerror": {"minus": relative_round(unc.values[i][0], self.digits),
+                                                              "plus": relative_round(unc.values[i][1], self.digits)},
+                                                "label": unc.label})
+            tmp["values"].append(valuedict)
+        return tmp
+
+
+class Table(object):
+    """
+    A table is a collection of variables.
+
+    It also holds meta-data such as a general description,
+    the location within the paper, etc.
+    """
+
+    def __init__(self, name):
+        self.name = name
+        self.variables = []
+        self.description = "Example description"
+        self.location = "Example location"
+        self.keywords = {}
+        self.additional_resources = []
+
+    def add_image(self, file_name, outdir):
+        """Add an image including thumbnail to the table."""
+        if not os.path.isfile(file_name):
+            raise RuntimeError("File %s does not exist!" % file_name)
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+        out_file_name = "{}.png".format(os.path.splitext(file_name)[0].rsplit("/", 1)[1])
+        thumb_out_file_name = "thumb_"+out_file_name
+        # first convert to png, then create thumbnail
+        command = "convert -flatten -fuzz 1% -trim +repage {} {}/{}".format(file_name, outdir, out_file_name)
+        execute_command(command)
+        command = "convert -thumbnail 240x179 {}/{} {}/{}".format(outdir, out_file_name, outdir, thumb_out_file_name)
+        execute_command(command)
+        image = {}
+        image["description"] = "Image file"
+        image["location"] = out_file_name
+        thumbnail = {}
+        thumbnail["description"] = "Thumbnail image file"
+        thumbnail["location"] = thumb_out_file_name
+        self.additional_resources.append(image)
+        self.additional_resources.append(thumbnail)
+
+
+    def add_variable(self, variable):
+        """Add a variable to the table"""
+        self.variables.append(variable)
+
+    def write_yaml(self, outdir="."):
+        """Write the table (and all its variables) to a YAML file."""
+        # Put all variables together into a table and write
+        table = {}
+        table["independent_variables"] = []
+        table["dependent_variables"] = []
+        for var in self.variables:
+            table["independent_variables" if var.is_independent else "dependent_variables"].append(
+                var.make_dict())
+
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+
+        shortname = self.name.lower().replace(" ", "_")
+        outfile_path = os.path.join(
+            outdir, '{NAME}.yaml'.format(NAME=shortname))
+        with open(outfile_path, 'w') as outfile:
+            yaml.dump(table, outfile, default_flow_style=False)
+
+        # Add entry to central submission file
+        submission_path = os.path.join(outdir, 'submission.yaml')
+        with open(submission_path, 'a+') as submissionfile:
+
+            submission = {}
+            submission["name"] = self.name
+            submission["description"] = self.description
+            submission["location"] = self.location
+            submission["data_file"] = '{NAME}.yaml'.format(NAME=shortname)
+            submission["keywords"] = []
+            if self.additional_resources:
+                submission["additional_resources"] = self.additional_resources
+
+            for name, values in list(self.keywords.items()):
+                submission["keywords"].append({"name": name, "values": values})
+
+            if(len(submissionfile.read())):
+                submissionfile.write("---\n")
+            yaml.dump(submission, submissionfile, default_flow_style=False, explicit_start=True)
+        return os.path.basename(outfile_path)
+
+
+class Submission(object):
+    """
+    Top-level object of a HEPData submission.
+
+    Holds all the lower-level objects and controls writing.
+    """
+
+    def __init__(self):
+        self.tables = []
+        self.comment = ""
+        self.additional_resources = []
+        self.record_ids = []
+
+    def get_license(self):
+        data_license = {}
+        data_license["name"] = "cc-by-4.0"
+        data_license["url"] = "https://creativecommons.org/licenses/by/4.0/"
+        data_license["description"] = "The content can be shared and adapted but you must give appropriate credit and cannot restrict access to others."
+        return data_license
+
+    def add_table(self, table):
+        self.tables.append(table)
+
+    def add_link(self, description, location):
+        # should check for working URL
+        link = {}
+        link["description"] = description
+        link["location"] = location
+        self.additional_resources.append(link)
+
+    def add_record_id(self, r_id, r_type):
+        # should add some type checks
+        record_id = {}
+        record_id["id"] = int(r_id)
+        record_id["type"] = r_type
+        self.record_ids.append(record_id)
+
+    def read_abstract(self, filepath):
+        with open(filepath) as afile:
+            raw = str(afile.read())
+        raw = raw.replace("\r\n", "")
+        raw = raw.replace("\n", "")
+
+        self.comment = raw
+
+    def create_files(self, outdir="."):
+        if not os.path.exists(outdir):
+            os.makedirs(outdir)
+
+        # Write general info about submission
+        submission = {}
+        submission["data_license"] = self.get_license()
+        submission["comment"] = self.comment
+
+        if self.additional_resources:
+            submission["additional_resources"] = self.additional_resources
+        if self.record_ids:
+            submission["record_ids"] = self.record_ids
+
+        with open(os.path.join(outdir, 'submission.yaml'), 'w') as outfile:
+            yaml.dump(submission, outfile, default_flow_style=False, explicit_start=True)
+
+        # Write all the tables
+        for table in self.tables:
+            table.write_yaml(outdir)
+
+        # Put everything into a tarfile
+        import tarfile
+        tar = tarfile.open("submission.tar.gz", "w:gz")
+        for f in find_all_matching(outdir, "*.yaml"):
+            tar.add(f)
+        for f in find_all_matching(outdir, "*.png"):
+            tar.add(f)
+        tar.close()
+
+
+class Uncertainty(object):
+    """
+    Store information about an uncertainty on a variable
+
+    Uncertainties can be symmetric or asymmetric.
+    The main information is stored as one (two) lists in the symmetric (asymmetric) case.
+    The list entries are the uncertainty for each of the list entries in the corresponding Variable.
+    """
+
+    def __init__(self, label, is_symmetric=True):
+        self.label = label
+        self.is_symmetric = is_symmetric
+        self.values = []
+
+    def set_values(self, values, nominal=None):
+        """
+        Setter method
+
+        Can perform list subtraction relative to nominal value.
+        """
+        if(nominal):
+            tmp = []
+            for (down, up), nominal in zip(values, nominal):
+                tmp.append((float(down - nominal), float(up - nominal)))
+            self._values = tmp
+        else:
+            if(not self.is_symmetric):
+                try:
+                    assert(all([x[1] >= 0 for x in values]))
+                    assert(all([x[0] <= 0 for x in values]))
+                except AssertionError:
+                    raise ValueError(
+                        "Uncertainty::set_values: Wrong signs detected! First element of uncertainty tuple should be <=0, second >=0.")
+                self._values = [(float(x[0]), float(x[1])) for x in values]
+            else:
+                self._values = values
+
+
+    def get_values(self):
+        return self._values
+    values = property(get_values, set_values)
+
+    def scale_values(self, factor):
+        """Multiply each value by constant factor."""
+        if(self.is_symmetric):
+            self.set_values([factor * x for x in self.get_values()])
+        else:
+            self.set_values([(factor * x[0], factor * x[1])
+                             for x in self.get_values()])
+
+
+class RootFileReader(object):
+    """Easily extract information from ROOT histograms, graphs, etc"""
+
+    def __init__(self, tfile):
+        self.set_file(tfile)
+
+    def __del__(self):
+        if(self.tfile):
+            self.tfile.Close()
+
+    def set_file(self, tfile):
+        """Define the TFile we should read from."""
+        if(type(tfile) == str):
+            if(os.path.exists(tfile) and tfile.endswith(".root")):
+                self.tfile = r.TFile(tfile)
+            else:
+                raise IOError("RootReader: File does not exist: " + tfile)
+        elif(type(tfile) == r.TFile):
+            self.tfile = tfile
+        else:
+            raise ValueError(
+                "RootReader: Encountered unkonown type of variable passed as tfile argument: " + type(tfile))
+
+        if(not self.tfile):
+            raise IOError("RootReader: File not opened properly.")
+
+    def retrieve_object(self,path_to_object):
+        """
+        Generalized function to retrieve a TObject from a file.
+
+        There are two use cases:
+        1)  The object is saved under the exact path given.
+            In this case, the function behaves identically to TFile::Get.
+
+        2)  The object is saved as a primitive in a TCanvas.
+            In this case, the path has to be formatted as
+            PATH_TO_CANVAS/NAME_OF_PRIMITIVE
+        """
+        obj = self.tfile.Get(path_to_object)
+
+        # If the Get operation was successful, just return
+        # Otherwise, try canvas approach
+        if(obj):
+            return obj
+        else:
+            parts = path_to_object.split("/")
+            path_to_canvas = "/".join(parts[0:-1])
+            name = parts[-1]
+
+            try:
+                canv = self.tfile.Get(path_to_canvas)
+                assert(canv)
+                for entry in list(canv.GetListOfPrimitives()):
+                    if(entry.GetName() == name):
+                        return entry
+
+                # Didn't find anything. Print available primitives to help user debug.
+                print("Available primitives in TCanvas '{0}':".format(path_to_canvas))
+                for entry in list(canv.GetListOfPrimitives()):
+                    print("Name: '{0}', Type: '{1}'.".format(entry.GetName(),type(entry)))
+                assert(False)
+
+            except AssertionError:
+                raise IOError("Cannot find any object in file {0} with path {1}".format(self.tfile,path_to_object))
+
+    def read_graph(self, path_to_graph):
+        """Extract lists of X and Y values from a TGraph."""
+        graph = self.retrieve_object(path_to_graph)
+        return get_graph_points(graph)
+
+
+
+    def read_hist_2d(self,path_to_hist):
+        hist = self.retrieve_object(path_to_hist)
+        return get_hist_2d_points(hist)
+
+
+    def read_tree(self, path_to_tree, branchname):
+        """Extract a list of values from a tree branch."""
+        tree = self.tfile.Get(path_to_tree)
+
+        values = []
+        for event in tree:
+            values.append(getattr(event, branchname))
+        return values
+
+    def read_limit_tree(self, path_to_tree="limit", branchname_x="mh", branchname_y="limit"):
+        # store in multidimensional numpy array
+        tree = self.tfile.Get(path_to_tree)
+        points = int(tree.GetEntries()/6)
+        values = np.empty((points,7))
+        limit_values = []
+        actual_index = 0
+        for index, event in enumerate(tree):
+            limit_values.append(getattr(event, branchname_y))
+            # every sixth event starts a new limit value
+            if (index % 6 == 5):
+                x_value = getattr(event, branchname_x)
+                values[actual_index] = [x_value]+limit_values
+                limit_values = []
+                actual_index += 1
+        return values
+
+
+def get_hist_2d_points(hist):
+    points = defaultdict(list)
+    for ix in range(1,hist.GetNbinsX()+1):
+        x = hist.GetXaxis().GetBinCenter(ix)
+        for iy in range(1,hist.GetNbinsY()+1):
+            y = hist.GetYaxis().GetBinCenter(iy)
+            z = hist.GetBinContent(ix,iy)
+            points["x"].append(x)
+            points["y"].append(y)
+            points["z"].append(z)
+    return points
+
+def get_graph_points(graph):
+    """
+    Extract lists of X and Y values from a TGraph.
+
+    A dictionary is returned with the following key-value pairs:
+
+    key "x" -> value list of x values
+    key "y" -> value list of y values
+
+    If the input graph is a TGraphErrors (TGraphAsymmErrors), the dictionary also contains
+
+
+    key "dx" -> value list of x uncertainties (tuple of lower, upper uncertainty)
+    key "dy" -> value list of y uncertainties (tuple of lower, upper uncertainty)
+
+    """
+
+    # Check input
+    if( type(graph) not in [r.TGraph, r.TGraphErrors, r.TGraphAsymmErrors]):
+        raise TypeError("Expected to input to be TGraph or similar, instead got '{0}'".format(type(graph)))
+
+    # Extract points
+    points = defaultdict(list)
+
+    for i in range(graph.GetN()):
+        x = r.Double()
+        y = r.Double()
+        graph.GetPoint(i, x, y)
+        points["x"].append(float(x))
+        points["y"].append(float(y))
+        if(type(graph)==r.TGraphErrors):
+            points["dx"].append(graph.GetErrorX(i))
+            points["dy"].append(graph.GetErrorY(i))
+        elif(type(graph)==r.TGraphAsymmErrors):
+            points["dx"].append((-graph.GetErrorXlow(i),graph.GetErrorXhigh(i)))
+            points["dy"].append((-graph.GetErrorYlow(i),graph.GetErrorYhigh(i)))
+
+    return points
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 00000000..9886c7c6
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,6 @@
+[bdist_wheel]
+# This flag says to generate wheels that support both Python 2 and Python
+# 3. If your code will not run unchanged on both Python 2 and 3, you will
+# need to generate separate wheels for each Python version that you
+# support.
+universal=1
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..79d90c8e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,38 @@
+from setuptools import setup, find_packages
+from codecs import open
+from os import path
+
+deps = [
+      'numpy',
+      'PyYAML'
+]
+
+here = path.abspath(path.dirname(__file__))
+
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+
+setup(name='hepdata_lib',
+      version='0.1',
+      description='Library for getting your data into HEPData',
+      long_description=long_description,
+      long_description_content_type='text/markdown',
+      url='http://github.com/clelange/hepdata_lib',
+      author='Clemens Lange',
+      author_email='clemens.lange@cern.ch',
+      classifiers=[
+        'Development Status :: 3 - Alpha',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+      ],
+      keywords='HEPData physics OpenData',
+      packages=find_packages(exclude=['contrib', 'docs', 'tests']),
+      zip_safe=False,
+      install_requires=deps,
+      project_urls={
+        'Bug Reports': 'http://github.com/clelange/hepdata_lib/issues',
+        'Source': 'http://github.com/clelange/hepdata_lib',
+      },
+      )