Skip to content

Commit

Permalink
chore: refactor devpontainer to debian image and skip aws-glue-libs
Browse files Browse the repository at this point in the history
  • Loading branch information
alvarosantossyngenta committed Feb 27, 2024
1 parent 5e8f273 commit 86da8c6
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 140 deletions.
116 changes: 76 additions & 40 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,66 +1,102 @@
ARG GLUE_IMAGE_VERSION=4.0.0_image_01
ARG PYTHON_VERSION=3.11

FROM amazon/aws-glue-libs:glue_libs_${GLUE_IMAGE_VERSION}
# FROM gradle:7.6.0 as gradle

ARG USERNAME=glue_user
ARG USER_UID=1000
ARG USER_GID=$USER_UID
ARG LOCAL_DEV=TRUE
# COPY build.gradle /tmp/

SHELL ["/bin/bash", "-c"]
# COPY .env /tmp/.env

USER root
# RUN bash -c ' \
# cd /tmp && export $(<.env grep -v "^#" | xargs) && gradle --no-daemon download \
# && ls -la /tmp/jars \
# '

FROM mcr.microsoft.com/devcontainers/python:${PYTHON_VERSION}-bullseye

ARG PYTHON_PROJECT_FOLDER="metorikku"
ARG FURY_URL="https://pypi.fury.io/syngenta-digital/"
ARG FURY_AUTH
ARG POETRY_VERSION=1.6.1
ARG USERNAME=vscode

ENV DEBIAN_FRONTEND=noninteractive
ENV AWS_REGION=eu-central-1
ENV PATH "/home/${USERNAME}/.local/bin:$PATH"

RUN yum install -y curl \
git \
jq \
software-properties-common \
telnet \
unzip \
vim \
wget \
zip

# Install sudo
RUN yum install -y sudo \
&& echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME

ADD .devcontainer/install_gnupg22.sh /usr/local/bin/
RUN test "$LOCAL_DEV" == "TRUE" && \
yum install -y \
gnupg2 && \
bash /usr/local/bin/install_gnupg22.sh || bash -c true
# Install base packages
RUN apt-get update \
&& apt-get -y install --no-install-recommends apt-utils 2>&1 \
&& apt-get -y install git procps lsb-release unzip curl wget gnupg2 vim python3 python3-dev python3-distutils default-jdk icu-devtools

# Install awscli
RUN mkdir -p /tmp/docker-downloads \
&& curl "https://awscli.amazonaws.com/awscli-exe-linux-$(arch).zip" -o "/tmp/docker-downloads/awscliv2.zip" \
&& cd /tmp/docker-downloads \
&& unzip awscliv2.zip \
&& ./aws/install

USER $USERNAME
WORKDIR "/home/${USERNAME}"
ENV HOME="/home/${USERNAME}"
WORKDIR /home/${USERNAME}/

SHELL ["/bin/bash", "-c"]

# Fix MongoDB bug
RUN rm -f /home/${USERNAME}/spark/jars/*mongo* /home/${USERNAME}/spark/jars/*bson* \
/home/${USERNAME}/aws-glue-libs/jars/*mongo* /home/${USERNAME}/aws-glue-libs/jars/*bson*
# RUN mkdir -p /tmp/project && \
# cd /tmp/project && \
# touch README.md

# Install Java & related
RUN sudo yum install -y java-1.8.0-amazon-corretto-devel
# COPY pyproject.toml poetry.lock "${PYTHON_PROJECT_FOLDER}" /tmp/project/
# COPY "${PYTHON_PROJECT_FOLDER}/" "/tmp/project/${PYTHON_PROJECT_FOLDER}/"

# RUN curl -sSL https://install.python-poetry.org | python3 - --version "${POETRY_VERSION}" && \
# poetry config repositories.fury "${FURY_URL}" && \
# poetry config http-basic.fury "${FURY_AUTH}" "${FURY_AUTH}" && \
# cd /tmp/project/ && \
# poetry install && \
# rm -Rf /home/$USERNAME/.config/pypoetry

ADD .devcontainer/init_container.sh /usr/local/bin/

# # Install Metorikku libraries
# RUN mkdir /home/${USERNAME}/spark-extra-libs/
# COPY --from=gradle /tmp/jars /home/${USERNAME}/spark-extra-libs/
# RUN ls -la /home/${USERNAME}/spark-extra-libs \
# && cd /home/${USERNAME}/spark-extra-libs \
# && unzip service-java-data-pipelines-metorikku_*-assembly.jar "schemas/*.yaml"

# Install Java & related
RUN curl -s "https://get.sdkman.io?rcupdate=false" | bash

ARG JAVA_VERSION="8.0.402-amzn"
ARG SCALA_VERSION="2.12.17"
ENV SCALA_BINARY_VERSION="2.12"
ARG SBT_VERSION="1.8.0"
ARG SPARK_VERSION="3.3.1"

RUN source "${HOME}/.sdkman/bin/sdkman-init.sh" \
&& sdk install java "$JAVA_VERSION" \
&& sdk install scala "$SCALA_VERSION" \
&& sdk install sbt "$SBT_VERSION"
&& sdk install sbt "$SBT_VERSION" \
&& sdk install spark "$SPARK_VERSION" \
&& ls -la "${HOME}/.sdkman/candidates/spark/current"/jars/guava-*.jar \
&& rm -Rf "${HOME}/.sdkman/candidates/spark/current"/jars/guava-*.jar
ADD src/main/resources/hive-site.xml "${HOME}/.sdkman/candidates/spark/current/conf"

# Other stuff
ADD .devcontainer/init_container.sh /usr/local/bin/
# Clean up
ENV DEBIAN_FRONTEND=dialog

ADD src/main/resources/hive-site.xml ${HOME}/spark/conf/hive-site.xml
USER root
RUN cd \
&& rm -rf /tmp/docker-downloads \
&& apt-get autoremove -y \
&& apt-get clean -y \
&& rm -rf /var/lib/apt/lists/* \
&& rm -Rf /tmp/project/

# Configure profile
USER $USERNAME

RUN echo '' >> "${HOME}/.bashrc"
RUN echo 'export PATH="${PATH}":${HOME}/.local/bin' >> "${HOME}/.bashrc"
RUN echo 'export AWS_REGION=eu-central-1' >> "${HOME}/.bashrc"
RUN echo '[[ -s "$HOME/.sdkman/bin/sdkman-init.sh" ]] && source "$HOME/.sdkman/bin/sdkman-init.sh"' >> "${HOME}/.bashrc"
RUN echo 'export SBT_OPTS="-Xms512M -Xmx2G -Xss2M -XX:MaxMetaspaceSize=1G"' >> "${HOME}/.bashrc"
RUN echo 'export SBT_OPTS="-Xms512M -Xmx2G -Xss2M -XX:MaxMetaspaceSize=1G"' >> "${HOME}/.bashrc"
10 changes: 5 additions & 5 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
"dockerComposeFile": "docker-compose.yml",
"service": "main",
"workspaceFolder": "/workspace",
"initializeCommand": [
"bash",
"-c",
"docker pull ghcr.io/syngenta-digital/service-java-data-pipelines-metorikku:devcontainer"
],
// "initializeCommand": [
// "bash",
// "-c",
// "docker pull ghcr.io/syngenta-digital/service-java-data-pipelines-metorikku:devcontainer"
// ],
"postCreateCommand": [
"bash",
"/usr/local/bin/init_container.sh"
Expand Down
6 changes: 3 additions & 3 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ services:
image: ghcr.io/syngenta-digital/service-java-data-pipelines-metorikku:devcontainer
environment:
<<: *default-environment
user: glue_user
command: -c "sleep infinity"
user: vscode
command: bash -c "sleep infinity"
volumes:
- ../:/workspace
- ${HOME}/.aws:/home/glue_user/.aws:ro
- ${HOME}/.aws:/home/vscode/.aws:ro
- ${HOME}/.gnupg/public.key:/tmp/.gnupg/public.key:ro
- ${HOME}/.gnupg/private.key:/tmp/.gnupg/private.key:ro
- ${HOME}/.ssh:/tmp/.ssh:ro
Expand Down
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"files.watcherExclude": {
"**/target": true
},
"metals.javaHome": "/usr/lib/jvm/java-1.8.0-amazon-corretto.aarch64/"
"metals.javaHome": "/home/vscode/.sdkman/candidates/java/current"
}
13 changes: 7 additions & 6 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ libraryDependencies ++= Seq(
"com.fasterxml.jackson.datatype" % "jackson-datatype-jdk8" % jacksonVersion.value,
"com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % jacksonVersion.value,
"com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonVersion.value,
"com.hubspot.jinjava" % "jinjava" % "2.7.1" excludeAll (excludeJackson),
"org.influxdb" % "influxdb-java" % "2.23",
"com.hubspot.jinjava" % "jinjava" % "2.7.1" excludeAll (excludeJackson),
"org.influxdb" % "influxdb-java" % "2.23",
"io.github.spark-redshift-community" %% "spark-redshift" % "6.2.0-spark_3.3" excludeAll (excludeAWS),
"com.segment.analytics.java" % "analytics" % "2.1.1" % "provided",
"com.datastax.spark" %% "spark-cassandra-connector" % "3.4.1" % "provided",
"com.redislabs" %% "spark-redis" % "3.1.0" % "provided",
"org.apache.kafka" %% "kafka" % "3.6.1" % "provided",
"com.segment.analytics.java" % "analytics" % "2.1.1" % "provided",
"com.datastax.spark" %% "spark-cassandra-connector" % "3.4.1" % "provided",
"com.redislabs" %% "spark-redis" % "3.1.0" % "provided",
"org.apache.kafka" %% "kafka" % "3.6.1" % "provided",
"za.co.absa" %% "abris" % "3.2.1" % "provided" excludeAll (excludeAvro, excludeSpark),
"org.apache.hudi" %% "hudi-spark-bundle" % "0.10.0" % "provided",
"org.apache.parquet" % "parquet-avro" % "1.12.3" % "provided",
Expand All @@ -99,6 +99,7 @@ libraryDependencies ++= Seq(
"org.apache.sedona" %% "sedona-viz-3.0" % "1.4.1" excludeAll (excludeSpark),
"org.datasyslab" % "geotools-wrapper" % "1.4.0-28.2" excludeAll (excludeSpark),
"com.amazonaws" % "aws-java-sdk-s3" % "1.12.642",
"com.amazonaws" % "aws-java-sdk-dynamodb" % "1.12.642",
"software.amazon.awssdk" % "dynamodb" % "2.23.8",
"software.amazon.awssdk" % "glue" % "2.23.8",
"software.amazon.awssdk" % "s3" % "2.23.8",
Expand Down
85 changes: 0 additions & 85 deletions examples/atlas/docker-compose.yml

This file was deleted.

0 comments on commit 86da8c6

Please sign in to comment.