From b32e9d78dbaee37f22bdd2b7d21a4216347ffbe6 Mon Sep 17 00:00:00 2001 From: Nathaniel Freeman Date: Fri, 1 May 2020 11:02:52 -0400 Subject: [PATCH] adds Dockerfile for proxy and documents its use (#20) --- DOCKER.md | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Dockerfile | 78 +++++++++++++++++++++++++++++++ README.md | 4 ++ 3 files changed, 216 insertions(+) create mode 100644 DOCKER.md create mode 100644 Dockerfile diff --git a/DOCKER.md b/DOCKER.md new file mode 100644 index 0000000..544a4dd --- /dev/null +++ b/DOCKER.md @@ -0,0 +1,134 @@ + + +# accumulo-proxy-docker +This documentation covers how to stand up [accumulo-proxy](https://github.com/apache/accumulo-proxy/) within a Docker container. + +The guide covers: +* Building the image +* Configuring the `proxy.properties` file +* Selecting an appropriate networking choice +* Starting and stopping the container +* Basic troubleshooting tips + +It is not recommended using this guide for a production instance of accumulo-proxy at this time. + +## Build the image +Firstly you will need the tarball of accumulo-proxy, this is documented in the [README.md](README.md) but for simplicity run: +```commandline +mvn clean package -Ptarball +``` + +Once you have the tarball (should be in ./target/ folder) then invoke the Docker build command to create a container image. +```commandline +docker build -t accumulo-proxy:latest . +``` + +## Default Configuration and Quickstart +By default, the container image expects the following to be true: +1. Your Accumulo instance name is "myinstance" +2. Your ZooKeeper is available (and reachable from the container) at localhost:2181 + +You can start the proxy using: +```commandline +docker run --rm -d -p 42424:42424 --network="host" --name accumulo-proxy accumulo-proxy:latest; +``` + +## Custom proxy.properties +If you wish to create advanced proxy.properties configuration changes, you should look to volume mount these in when you invoke the `docker run` command, an example is: +```commandline +docker run --rm -d -p 42424:42424 -v /path/to/proxy.properties:/opt/accumulo-proxy/conf/proxy.properties --network="host" --name accumulo-proxy accumulo-proxy:latest +``` + +## Networking configuration +Container networking can be a very specialised subject therefore we document two common practices that should cover the majority of use cases for development. + +The proxy container must be able to access both Accumulo and ZooKeeper. + +The ZooKeeper location can be configured in the `conf/proxy.properties` file, so you can override this to an acceptable value (see "Custom proxy.properties" section) + +In order to communicate with Accumulo the container will need to be able to resolve the FQDN that the servers have registered in ZooKeeper. If using [fluo-uno](https://github.com/apache/fluo-uno) this is very likely the hostname of your development environment. We'll call this my.host.com and IP 192.168.0.1 for the rest of this document. + +### Host networking + +Host networking is the simplest mechanism but generally only works for linux hosts where Docker has been installed on 'bare metal' e.g. through an RPM. + +You can test if this will work for you by executing the following steps + +Start the accumulo-proxy container and enter it +```commandline +docker run -it --rm -p 42424:42424 --network="host" --name accumulo-proxy accumulo-proxy:latest bash; +``` + +Once inside the container, execute the curl command to attempt to connect to the monitor webserver: +```commandline +curl my.host.com:9995 +``` + +If the terminal returns an error such as: +``` +curl: (7) Failed to connect to my.host.com 9995: Connection refused +``` +then your container cannot see the host, and you will need to look at the next section (Non-Host networking). + +If you receive the HTML for the monitor web page then host networking will work for you and you can add `--network="host"` to each Docker command going forward. + +An example of using host networking: +```commandline +docker run --rm -d -p 42424:42424 --network="host" --name accumulo-proxy accumulo-proxy:latest +``` + +Note: You do not need to map your ports (-p) if using host networking, but we include it for clarity. + +For more details see the official Docker documentation: [Use host Networking](https://docs.docker.com/network/host) + +### Non-Host networking +If you run outside of a single node linux installation, e.g. Docker for Mac, Docker for Windows or use a VM to isolate your Docker engine then you will likely need to take this path. + +Docker allows you to supply additional addresses to be resolved by the container, and these are automatically added by Docker to the /etc/hosts + +For each host add a `--add-host FQDN:IP` entry to your Docker run command, you can add multiple entries if need to, see the official docs covering [network settings](https://docs.docker.com/engine/reference/run/#network-settings). + +An example of using this approach: + +```commandline +docker run --rm -d -p 42424:42424 --add-host "my.host.com:192.168.0.1" --name accumulo-proxy accumulo-proxy:latest +``` + +## Cleanup +Once completed you should stop and remove the container. +```commandline +docker stop accumulo-proxy; +docker rm accumulo-proxy; +``` + +## Troubleshooting +It can often be difficult to know where to start with troubleshooting inside containers, if you need to enter the container without starting the proxy we support this: +```commandline +docker run -it --rm -p 42424:42424 --network="host" --name accumulo-proxy accumulo-proxy:latest bash +``` + +The container is very slim so if need be you can add additional tools using `apt`. + +If you wish to manually execute the accumulo-proxy in the container you can: +```commandline +/opt/accumulo-proxy/bin/accumulo-proxy -p /opt/accumulo-proxy/conf/proxy.properties +``` + +Some resources for additional help: +* [Main Accumulo Website](https://accumulo.apache.org/) +* [Contact Us page](https://accumulo.apache.org/contact-us/) \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..770e938 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,78 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM openjdk:8 + +EXPOSE 42424 + +WORKDIR /opt/accumulo-proxy + +ARG HADOOP_VERSION=3.2.1 +ARG ZOOKEEPER_VERSION=3.5.7 +ARG ACCUMULO_VERSION=2.0.0 +ARG ACCUMULO_PROXY_VERSION=2.0.0-SNAPSHOT + +ARG HADOOP_HASH=a57962a24d178193349917730bf95cdc99bde9df +ARG ZOOKEEPER_HASH=619928c8553b62775119e3d7d143a4714a160365 +ARG ACCUMULO_HASH=b72bf5c3dcaa25387933a032925046234f30e17a + +# Download from Apache mirrors instead of archive #9 +ENV APACHE_DIST_URLS \ + https://www.apache.org/dyn/closer.cgi?action=download&filename= \ +# if the version is outdated (or we're grabbing the .asc file), we might have to pull from the dist/archive :/ + https://www-us.apache.org/dist/ \ + https://www.apache.org/dist/ \ + https://archive.apache.org/dist/ + +RUN set -eux; \ + download_bin() { \ + local f="$1"; shift; \ + local hash="$1"; shift; \ + local distFile="$1"; shift; \ + local success=; \ + local distUrl=; \ + for distUrl in ${APACHE_DIST_URLS}; do \ + if wget -nv -O "/tmp/${f}" "${distUrl}${distFile}"; then \ + success=1; \ + # Checksum the download + echo "${hash}" "/tmp/${f}" | sha1sum -c -; \ + break; \ + fi; \ + done; \ + [ -n "${success}" ]; \ + };\ + \ + download_bin "apache-zookeeper.tar.gz" "${ZOOKEEPER_HASH}" "zookeeper/zookeeper-${ZOOKEEPER_VERSION}/apache-zookeeper-${ZOOKEEPER_VERSION}-bin.tar.gz"; \ + download_bin "hadoop.tar.gz" "$HADOOP_HASH" "hadoop/core/hadoop-${HADOOP_VERSION}/hadoop-$HADOOP_VERSION.tar.gz"; \ + download_bin "accumulo.tar.gz" "${ACCUMULO_HASH}" "accumulo/${ACCUMULO_VERSION}/accumulo-${ACCUMULO_VERSION}-bin.tar.gz"; + +# Install the dependencies into /opt/ +RUN tar xzf /tmp/hadoop.tar.gz -C /opt/ && ln -s /opt/hadoop-${HADOOP_VERSION} /opt/hadoop +RUN tar xzf /tmp/apache-zookeeper.tar.gz -C /opt/ && ln -s /opt/apache-zookeeper-${ZOOKEEPER_VERSION}-bin /opt/apache-zookeeper +RUN tar xzf /tmp/accumulo.tar.gz -C /opt/ && ln -s /opt/accumulo-${ACCUMULO_VERSION} /opt/accumulo && sed -i 's/\${ZOOKEEPER_HOME}\/\*/\${ZOOKEEPER_HOME}\/\*\:\${ZOOKEEPER_HOME}\/lib\/\*/g' /opt/accumulo/conf/accumulo-env.sh + +ENV HADOOP_HOME /opt/hadoop +ENV ZOOKEEPER_HOME /opt/apache-zookeeper +ENV ACCUMULO_HOME /opt/accumulo + +# Add the proxy binary +COPY target/accumulo-proxy-${ACCUMULO_PROXY_VERSION}-bin.tar.gz /tmp/ +RUN tar xzf /tmp/accumulo-proxy-${ACCUMULO_PROXY_VERSION}-bin.tar.gz -C /opt/accumulo-proxy --strip 1 +ENV ACCUMULO_PROXY_HOME /opt/accumulo-proxy + +# Ensure Accumulo is on the path. +ENV PATH "${PATH}:${ACCUMULO_HOME}/bin" + +CMD ["/opt/accumulo-proxy/bin/accumulo-proxy", "-p", "/opt/accumulo-proxy/conf/proxy.properties"] diff --git a/README.md b/README.md index a2a9120..8f56222 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,10 @@ Thrift language binding). ./bin/accumulo-proxy -p conf/proxy.properties ``` +# Docker Environment + +The Accumulo Proxy can also now be packaged and started in a Docker container, see the [DOCKER.md](DOCKER.md) for full details. + # Build language specific bindings Bindings have been built in `src/main/` for Java, Python, and Ruby.