diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
index a9e858cd..62312dce 100644
--- a/.github/workflows/ci-build.yml
+++ b/.github/workflows/ci-build.yml
@@ -28,3 +28,25 @@ jobs:
       with:
         github-token: ${{ secrets.GITHUB_TOKEN }}
         format: jacoco
+
+  docker-build:
+    needs: [ build ]
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Create more disk space
+        run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+      - uses: actions/checkout@v2
+      - name: Build and push
+        id: docker_build
+        uses: mr-smithers-excellent/docker-build-push@v5
+        with:
+          dockerfile: Dockerfile.local
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          image: lfoppiano/grobid-quantities
+          registry: docker.io
+          pushImage: ${{ github.event_name != 'pull_request' }}
+          tags: latest-develop
+      - name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e643b476..26fa4e6f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,55 +4,104 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
-## [Unreleased]
+## [0.8.0]
 
-## [0.7.1] – 2021-09-06
+### Added
+
++ Docker image snapshots are built and pushed on dockerhub at each commit
++ new Dockerfile.local that does not clone from github
+
+### Changed
+
++ Updated to Grobid version 0.8.0 
++ Updated to Dropwizard version 4.x (from version 1.x)
+
+## [0.7.3] – 2023-06-26
+
+### Added
+
++ Added additional units in the lexicon
++ Added missing log when exception are raised
++ Introduced Kotlin for new development
+
+### Changed
+
++ Upgrade to grobid 0.7.3 and support to JDK > 11
++ Updated Docker image to support JDK 17 and use the gradle distribution script instead of the JAR directly
++ Transitioned from circleci to GitHub actions
+
+### Fixed
+
++ Fix notation lexicon #97
++ Fix list and labelled sequence extraction with DL BERT models #153
++ Improve recognition of composed units using sentence segmentation #155 #87
+
+## [0.7.2] – 2023-01-20
+
+### Added
+
++ Create holdout set by @lfoppiano in #145
++ Add additional DL and transformers models by @lfoppiano in #146
+
+### Changed
+
+Update to Grobid 0.7.2
+
+### Fixed
+
++ Fix value parser's incorrect recognition by @lfoppiano in #141
+
+## [0.7.1] – 2022-09-02
 
 ### Added
+
 + New BidLSTM_CRF models for quantities, values and units parsing #129
-+ Add docker image on hub.docker.com #142 
-+ Update to Grobid 0.7.1 #137        
++ Add docker image on hub.docker.com #142
++ Update to Grobid 0.7.1 #137
 
 ### Changed
+
 + Use the grobid sentence segmentation for the quantified object sentence splitting #138
 
 ### Fixed
-+ Fixes incorrect boxes colors #125
-+ Fixed lexicon #134  
 
++ Fixes incorrect boxes colors #125
++ Fixed lexicon #134
 
 ## [0.7.0] – 2021-08-06
 
 ### Added
+
 + Docker image #128
-+ Configurable number of parallel request 
++ Configurable number of parallel request
 + Various improvement in the unit normalisation and update of library Unit of measurement to version 2.x #95
 
 ### Changed
+
 + Retrained models with CRF
 + Grobid 0.7.0 #123
 
 ### Fixed
+
 + Coveralls build #127
 + Fixed command line parameters #119
 
-
-
 ## [0.6.0] – 2020-04-30
 
 ### Added
+
 + First official release
-+ Extraction of quantities, units and values using CRF 
-+ Support for Text and PDF   
++ Extraction of quantities, units and values using CRF
++ Support for Text and PDF
 
 ### Changed
-+ Added evaluation measurement and models 
 
++ Added evaluation measurement and models
 
 ### Fixed
 
-
 [Unreleased]: https://github.com/kermitt2/grobid/compare/0.6.0...HEAD
+
 [0.6.0]: https://github.com/kermitt2/grobid/compare/0.6.0
 
 <!-- markdownlint-disable-file MD024 MD033 -->
diff --git a/Dockerfile.local b/Dockerfile.local
new file mode 100644
index 00000000..28d82958
--- /dev/null
+++ b/Dockerfile.local
@@ -0,0 +1,121 @@
+## Docker GROBID-quantities image using deep learning models and/or CRF models, and various python modules
+## Borrowed from https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft
+## See https://grobid.readthedocs.io/en/latest/Grobid-docker/
+
+## usage example with grobid: https://github.com/kermitt2/grobid/blob/master/Dockerfile.delft
+
+## docker build -t lfoppiano/grobid-quantities:0.7.0 --build-arg GROBID_VERSION=0.7.0 --file Dockerfile .
+
+## no GPU:
+## docker run -t --rm --init -p 8060:8060 -p 8061:8061 -v config.yml:/opt/grobid/grobid-quantities:ro  lfoppiano/grobid-quantities:0.7.1
+
+## allocate all available GPUs (only Linux with proper nvidia driver installed on host machine):
+## docker run --rm --gpus all --init -p 8072:8072 -p 8073:8073 -v grobid.yaml:/opt/grobid/grobid-home/config/grobid.yaml:ro  lfoppiano/grobid-superconductors:0.3.0-SNAPSHOT
+
+# -------------------
+# build builder image
+# -------------------
+
+FROM openjdk:17-jdk-slim as builder
+
+USER root
+
+RUN apt-get update && \
+    apt-get -y --no-install-recommends install apt-utils libxml2 git unzip
+
+WORKDIR /opt/grobid
+
+RUN mkdir -p grobid-quantities-source grobid-home/models
+COPY src grobid-quantities-source/src
+COPY settings.gradle grobid-quantities-source/
+COPY resources/config/config-docker.yml grobid-quantities-source/resources/config/config.yml
+COPY resources/models grobid-quantities-source/resources/models
+COPY resources/clearnlp/models/* grobid-quantities-source/resources/clearnlp/models/
+COPY build.gradle grobid-quantities-source/
+COPY gradle.properties grobid-quantities-source/
+COPY gradle grobid-quantities-source/gradle/
+COPY gradlew grobid-quantities-source/
+COPY .git grobid-quantities-source/.git
+COPY localLibs grobid-quantities-source/localLibs
+
+# Preparing models
+WORKDIR /opt/grobid/grobid-quantities-source
+RUN rm -rf /opt/grobid/grobid-home/models/*
+RUN ./gradlew clean assemble -x shadowJar --no-daemon  --stacktrace --info
+#RUN ./gradlew copyModels --info  --no-daemon
+RUN ./gradlew downloadTransformers --no-daemon --info --stacktrace && rm -f /opt/grobid/grobid-home/models/*.zip
+
+# Preparing distribution
+WORKDIR /opt/grobid
+RUN unzip -o /opt/grobid/grobid-quantities-source/build/distributions/grobid-quantities-*.zip -d grobid-quantities_distribution && mv grobid-quantities_distribution/grobid-quantities-* grobid-quantities
+
+WORKDIR /opt
+
+# -------------------
+# build runtime image
+# -------------------
+
+FROM grobid/grobid:0.7.3 as runtime
+
+# setting locale is likely useless but to be sure
+ENV LANG C.UTF-8
+
+RUN apt-get update && \
+    apt-get -y --no-install-recommends install git wget 
+
+WORKDIR /opt/grobid
+
+RUN mkdir -p /opt/grobid/grobid-quantities/resources/clearnlp/models /opt/grobid/grobid-quantities/resources/clearnlp/config
+COPY --from=builder /opt/grobid/grobid-home/models ./grobid-home/models
+COPY --from=builder /opt/grobid/grobid-quantities ./grobid-quantities/
+COPY --from=builder /opt/grobid/grobid-quantities-source/resources/config/config.yml ./grobid-quantities/resources/config/
+COPY --from=builder /opt/grobid/grobid-quantities-source/resources/clearnlp/models/* ./grobid-quantities/resources/clearnlp/models/
+
+VOLUME ["/opt/grobid/grobid-home/tmp"]
+
+RUN ln -s /opt/grobid/grobid-quantities/resources /opt/grobid/resources
+
+# JProfiler
+#RUN wget https://download-gcdn.ej-technologies.com/jprofiler/jprofiler_linux_12_0_2.tar.gz -P /tmp/ && \
+#  tar -xzf /tmp/jprofiler_linux_12_0_2.tar.gz -C /usr/local &&\
+#  rm /tmp/jprofiler_linux_12_0_2.tar.gz
+
+WORKDIR /opt/grobid
+ARG GROBID_VERSION
+ENV GROBID_VERSION=${GROBID_VERSION:-latest}
+ENV GROBID_QUANTITIES_OPTS "-Djava.library.path=/opt/grobid/grobid-home/lib/lin-64:/usr/local/lib/python3.8/dist-packages/jep --add-opens java.base/java.lang=ALL-UNNAMED"
+
+# This code removes the fixed seeed in DeLFT to increase the uncertanty 
+#RUN sed -i '/seed(7)/d' /usr/local/lib/python3.8/dist-packages/delft/utilities/Utilities.py
+#RUN sed -i '/from numpy\.random import seed/d' /usr/local/lib/python3.8/dist-packages/delft/utilities/Utilities.py
+
+EXPOSE 8060 8061 5005
+
+#CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-quantities-${GROBID_VERSION}-onejar.jar", "server", "grobid-superconductors/config.yml"]
+#CMD ["sh", "-c", "java -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=0.0.0.0:5005 -jar grobid-quantities/grobid-quantities-${GROBID_VERSION}-onejar.jar server grobid-quantities/config.yml"]
+#CMD ["sh", "-c", "java -jar grobid-quantities/grobid-quantities-${GROBID_VERSION}-onejar.jar server grobid-quantities/config.yml"]
+CMD ["./grobid-quantities/bin/grobid-quantities", "server", "grobid-quantities/resources/config/config.yml"]
+
+
+LABEL \
+    authors="Luca Foppiano, Patrice Lopez" \
+    org.label-schema.name="grobid-quantities" \
+    org.label-schema.description="Docker image for grobid-quantities service" \
+    org.label-schema.url="https://github.com/kermitt2/grobid-quantities" \
+    org.label-schema.version=${GROBID_VERSION}
+
+
+## Docker tricks:
+
+# - remove all stopped containers
+# > docker rm $(docker ps -a -q)
+
+# - remove all unused images
+# > docker rmi $(docker images --filter "dangling=true" -q --no-trunc)
+
+# - remove all untagged images
+# > docker rmi $(docker images | grep "^<none>" | awk "{print $3}")
+
+# - "Cannot connect to the Docker daemon. Is the docker daemon running on this host?"
+# > docker-machine restart
+
diff --git a/build.gradle b/build.gradle
index 98f676b7..70ecd88f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -341,6 +341,9 @@ publishing {
 
 def conf = new org.yaml.snakeyaml.Yaml().load(new File("resources/config/config.yml").newInputStream())
 def grobidHome = conf.grobidHome.replace("\$", "").replace('{', "").replace("GROBID_HOME:- ", "").replace("}", "")
+if (grobidHome.startsWith("../")) {
+    grobidHome = "${rootProject.rootDir}/${grobidHome}"
+}
 
 /** Model management **/
 
@@ -354,7 +357,7 @@ task copyModels(type: Copy) {
     include "**/preprocessor.json"
     exclude "**/features-engineering/**"
     exclude "**/result-logs/**"
-    into "${rootDir}/${grobidHome}/models/"
+    into "${grobidHome}/models/"
 
     doLast {
         print "Copy models under grobid-home: ${grobidHome}"
@@ -365,11 +368,11 @@ task downloadTransformers(dependsOn: copyModels) {
     doLast {
         download {
             src "https://transformers-data.s3.eu-central-1.amazonaws.com/quantities-transformers.zip"
-            dest "${rootDir}/${grobidHome}/models/quantities-transformers.zip"
+            dest "${grobidHome}/models/quantities-transformers.zip"
             overwrite false
             print "Download bulky transformers files under grobid-home: ${grobidHome}"
         }
-        ant.unzip(src: "${rootDir}/${grobidHome}/models/quantities-transformers.zip", dest: "${rootDir}/${grobidHome}/models/")
+        ant.unzip(src: "${grobidHome}/models/quantities-transformers.zip", dest: "${grobidHome}/models/")
     }
 }
 
@@ -396,4 +399,4 @@ release {
     git {
         requireBranch.set('test')
     }
-}
\ No newline at end of file
+}
diff --git a/scripts/dataset_analysis_quantities.py b/scripts/dataset_analysis_quantities.py
index 9580dd85..f9ceaf27 100644
--- a/scripts/dataset_analysis_quantities.py
+++ b/scripts/dataset_analysis_quantities.py
@@ -6,9 +6,8 @@
 from pathlib import Path
 
 from bs4 import BeautifulSoup, NavigableString, Tag
-
-from grobid_superconductors.commons.grobid_tokenizer import tokenizeSimple
-from grobid_superconductors.commons.quantities_tei_parser import get_children_list
+from supermat.grobid_tokenizer import tokenizeSimple
+from supermat.supermat_tei_parser import get_children_list
 
 
 def process_dir(input):
@@ -53,7 +52,7 @@ def process_file(input):
 
     document_statistics['batch'] = batch
 
-    children = get_children_list(soup)
+    children = get_children_list(soup, use_paragraphs=True)
 
     for paragraph in children:
         for item in paragraph:
diff --git a/scripts/quantities_tei_parser.py b/scripts/quantities_tei_parser.py
index 5056feb1..21df6deb 100644
--- a/scripts/quantities_tei_parser.py
+++ b/scripts/quantities_tei_parser.py
@@ -1,18 +1,118 @@
 import re
+from collections import OrderedDict
+from pathlib import Path
+from typing import List
 
 from bs4 import BeautifulSoup, Tag, NavigableString
+from supermat.supermat_tei_parser import tokenise
 
-from .grobid_tokenizer import tokenizeSimple
+ENTITY_TYPES = ['value', 'interval', 'range', 'list']
 
+def process_file_to_json(input_file_path):
+    with open(input_file_path, encoding='utf-8') as fp:
+        doc = fp.read()
 
-def tokenise(string):
-    return tokenizeSimple(string)
+    mod_tags = re.finditer(r'(</\w+>) ', doc)
+    for mod in mod_tags:
+        doc = doc.replace(mod.group(), ' ' + mod.group(1))
+    soup = BeautifulSoup(doc, 'xml')
 
+    output_document = OrderedDict()
+    output_document['doc_key'] = Path(str(input_file_path)).name
+    output_document['dataset'] = 'Quantities'
+    output_document['lang'] = 'en'
 
-def get_children_list(soup, verbose=False):
+    output_document['level'] = 'paragraph'
+    paragraph_nodes = get_nodes(soup)
+    passages, relations = process_paragraphs(paragraph_nodes)
+
+    output_document['passages'] = passages
+    output_document['relations'] = relations
+
+    return output_document
+
+
+def get_nodes(soup, verbose=False):
     children = soup.find_all("p")
 
     if verbose:
         print(str(children))
 
     return children
+
+
+def process_paragraphs(paragraph_list: list) -> [List, List]:
+    """
+    Process XML with <p> and <s> as sentences.
+
+    Return two list passage (sentence or paragraph,spans and link) and relations (links at document-level)
+    """
+    token_offset_sentence = 0
+    ient = 1
+
+    passages = []
+    relations = []
+
+    i = 0
+    for paragraph_id, paragraph in enumerate(paragraph_list):
+        passage = OrderedDict()
+
+        j = 0
+        offset = 0
+        tokens = []
+        text_paragraph = ''
+        spans = []
+
+        passage['text'] = text_paragraph
+        passage['tokens'] = tokens
+        passage['type'] = 'paragraph'
+        passage['spans'] = spans
+        passage['id'] = paragraph_id
+
+        for idx, item in enumerate(paragraph.contents):
+            if type(item) is NavigableString:
+                local_text = str(item).replace("\n", " ")
+                # We preserve spaces that are in the middle
+                if idx == 0 or idx == len(paragraph.contents) - 1:
+                    local_text = local_text.strip()
+                text_paragraph += local_text
+                token_list = tokenise(local_text)
+                tokens.extend(token_list)
+                token_offset_sentence += len(token_list)
+                offset += len(local_text)
+            elif type(item) is Tag and item.name == 'measure' and 'type' in item.attrs and item.attrs['type'] in ENTITY_TYPES:
+                local_text = item.text
+                text_paragraph += local_text
+                span = OrderedDict()
+                front_offset = 0
+                if local_text.startswith(" "):
+                    front_offset = len(local_text) - len(local_text.lstrip(" "))
+
+                span['text'] = local_text.strip(" ")
+                span['offset_start'] = offset + front_offset
+                span['offset_end'] = offset + len(span['text']) + front_offset
+                spans.append(span)
+
+                offset += len(local_text)
+
+                assert text_paragraph[span['offset_start']:span['offset_end']] == span['text']
+
+                if 'type' not in item.attrs:
+                    raise Exception("RS without type is invalid. Stopping")
+                token_list = tokenise(local_text)
+                tokens.extend(token_list)
+
+                entity_class = item.attrs['type']
+                span['type'] = entity_class
+
+                span['token_start'] = token_offset_sentence
+                span['token_end'] = token_offset_sentence + len(token_list) - 1
+
+                j += 1
+
+            ient += 1  # entity No.
+
+        passage['text'] = text_paragraph
+        passages.append(passage)
+        i += 1
+    return passages, relations
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index 3a6469d5..456dba20 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,6 +1,6 @@
 beautifulsoup4
-blingfire
 lxml
 tqdm
 pyyaml
-sklearn
\ No newline at end of file
+sklearn
+supermat
\ No newline at end of file
diff --git a/scripts/xml2csv_entities.py b/scripts/xml2csv_entities.py
new file mode 100644
index 00000000..4c32ce07
--- /dev/null
+++ b/scripts/xml2csv_entities.py
@@ -0,0 +1,139 @@
+import argparse
+import csv
+import os
+from pathlib import Path
+
+from supermat.utils import get_in_paths_from_directory
+
+from quantities_tei_parser import process_file_to_json, ENTITY_TYPES
+
+paragraph_id = 'paragraph_id'
+
+
+def write_output(output_path, data, header, format="csv"):
+    delimiter = '\t' if format == 'tsv' else ','
+    fw = csv.writer(open(output_path, encoding='utf-8', mode='w'), delimiter=delimiter, quotechar='"')
+    fw.writerow(header)
+    fw.writerows(data)
+
+
+def get_entity_data(data_sorted, remove_dups=False):
+    entities = []
+    record_id = 0
+    for passage in data_sorted['passages']:
+        text = passage['text']
+        spans = [span['text'] for span in filter(lambda s: s['type'] in ENTITY_TYPES, passage['spans'])]
+        if remove_dups:
+            ents = list(set(spans))
+        else:
+            ents = list(spans)
+        for ent in ents:
+            entities.append(
+                [
+                    record_id,
+                    data_sorted['doc_key'],
+                    passage['id'],
+                    ent
+                ]
+            )
+            record_id += 1
+
+        # entities.append(
+        #     {
+        #         "text": text,
+        #         "entities": ents
+        #     }
+        # )
+
+    return entities
+
+
+def get_texts(data_sorted):
+    text_data = [[idx, data_sorted['doc_key'], data_sorted['passages'][idx]['id'], data_sorted['passages'][idx]['text']]
+                 for idx in
+                 range(0, len(data_sorted['passages']))]
+
+    return text_data
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Converter XML (Supermat) to CSV for entity extraction (no relation information are used)")
+
+    parser.add_argument("--input",
+                        help="Input file or directory",
+                        required=True)
+    parser.add_argument("--output",
+                        help="Output directory",
+                        required=True)
+    parser.add_argument("--recursive",
+                        action="store_true",
+                        default=False,
+                        help="Process input directory recursively. If input is a file, this parameter is ignored.")
+    parser.add_argument("--entity-type",
+                        default="quantity",
+                        required=False,
+                        help="Select which entity type to extract.")
+
+    args = parser.parse_args()
+
+    input = args.input
+    output = args.output
+    recursive = args.recursive
+    ent_type = args.entity_type
+
+    if os.path.isdir(input):
+        path_list = get_in_paths_from_directory(input, "xml", recursive=recursive)
+
+        entities_data = []
+        texts_data = []
+        for path in path_list:
+            print("Processing: ", path)
+            file_data = process_file_to_json(path)
+            # data = sorted(file_data, key=lambda k: k[paragraph_id])
+            entity_data = get_entity_data(file_data, ent_type)
+            entities_data.extend(entity_data)
+
+            text_data = get_texts(file_data)
+            texts_data.extend(text_data)
+
+        if os.path.isdir(str(output)):
+            output_path_text = os.path.join(output, "output-text") + ".csv"
+            output_path_expected = os.path.join(output, "output-" + ent_type) + ".csv"
+        else:
+            parent_dir = Path(output).parent
+            output_path_text = os.path.join(parent_dir, "output-text" + ".csv")
+            output_path_expected = os.path.join(parent_dir, "output-" + ent_type + ".csv")
+
+        header = ["id", "filename", "pid", ent_type]
+
+        for idx, data in enumerate(entities_data):
+            data[0] = idx
+
+        write_output(output_path_expected, entities_data, header)
+
+        header = ["id", "filename", "pid", "text"]
+        for idx, data in enumerate(texts_data):
+            data[0] = idx
+        write_output(output_path_text, texts_data, header)
+
+    elif os.path.isfile(input):
+        input_path = Path(input)
+        file_data = process_file_to_json(input_path)
+        output_filename = input_path.stem
+
+        output_path_text = os.path.join(output, str(output_filename) + "-text" + ".csv")
+        texts_data = get_texts(file_data)
+        for idx, data in enumerate(texts_data):
+            data[0] = idx
+
+        header = ["id", "filename", "pid", "text"]
+        write_output(output_path_text, texts_data, header)
+
+        output_path_expected = os.path.join(output, str(output_filename) + "-" + ent_type + ".csv")
+        ent_data_no_duplicates = get_entity_data(file_data, ent_type)
+        for idx, data in enumerate(ent_data_no_duplicates):
+            data[0] = idx
+
+        header = ["id", "filename", "pid", ent_type]
+        write_output(output_path_expected, ent_data_no_duplicates, header)