forked from idiap/asrt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
40 lines (29 loc) · 864 Bytes
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
FROM ubuntu:16.04
RUN apt-get update && \
apt-get install -y \
build-essential \
git \
libpoppler-cpp-dev \
pkg-config \
python2.7 \
python-dev \
python-pip \
python-roman \
poppler-utils \
vim
WORKDIR /usr/local
RUN git clone https://github.com/idiap/asrt.git
ADD requirements.txt /usr/local/asrt
WORKDIR /usr/local/asrt
RUN pip install -r requirements.txt
WORKDIR /usr/local/asrt
ENV NLTK_DATA=/usr/local/asrt/nltk_data
RUN mkdir -p NLTK_DATA && \
python -m nltk.downloader punkt -d $NLTK_DATA && \
python -m nltk.downloader europarl_raw -d $NLTK_DATA
ENV LANG=1
ENV REGEX=examples/resources/regex.csv
ENTRYPOINT ["data-preparation/python/run_data_preparation.py", \
"-l", "0", \
"-r", "examples/resources/regex.csv", "-s", "-m"]
# requires -i inputfile -o outputfolder and mounting volume