From 6f042b4e7d4d84bf36475e0b5020ed8d2cf462ed Mon Sep 17 00:00:00 2001 From: Lee Yang Date: Fri, 18 Dec 2020 08:50:22 -0800 Subject: [PATCH 1/3] migrate build from travis to screwdriver --- .travis.yml | 35 --------- docs/source/conf.py | 4 +- pom.xml | 2 +- requirements.txt | 2 +- screwdriver.yaml | 53 +++++++++++++ scripts/install_spark.sh | 11 +++ scripts/start_spark.sh | 12 +++ scripts/stop_spark.sh | 3 + scripts/travis_before_install.sh | 28 ------- sd.allow | 4 + setup.cfg | 73 ++++++++++++++++- setup.py | 51 ++++++------ tensorflowonspark/__init__.py | 2 +- {test => tests}/README.md | 0 {test => tests}/run_tests.sh | 0 {test => tests}/test.py | 0 {test => tests}/test_TFCluster.py | 0 {test => tests}/test_TFNode.py | 0 {test => tests}/test_TFSparkNode.py | 0 {test => tests}/test_dfutil.py | 0 {test => tests}/test_pipeline.py | 0 {test => tests}/test_reservation.py | 0 tox.ini | 118 ++++++++++++++++++++++++++++ 23 files changed, 302 insertions(+), 96 deletions(-) delete mode 100644 .travis.yml create mode 100644 screwdriver.yaml create mode 100644 scripts/install_spark.sh create mode 100644 scripts/start_spark.sh create mode 100755 scripts/stop_spark.sh delete mode 100644 scripts/travis_before_install.sh create mode 100644 sd.allow rename {test => tests}/README.md (100%) rename {test => tests}/run_tests.sh (100%) rename {test => tests}/test.py (100%) rename {test => tests}/test_TFCluster.py (100%) rename {test => tests}/test_TFNode.py (100%) rename {test => tests}/test_TFSparkNode.py (100%) rename {test => tests}/test_dfutil.py (100%) rename {test => tests}/test_pipeline.py (100%) rename {test => tests}/test_reservation.py (100%) create mode 100644 tox.ini diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d58b9785..00000000 --- a/.travis.yml +++ /dev/null @@ -1,35 +0,0 @@ -matrix: - include: - - language: python - python: 3.6 - dist: xenial - before_install: - - source scripts/travis_before_install.sh - install: - - pip install -r requirements.txt - - pip list - script: - - sphinx-build -b html docs/source docs/build/html - - test/run_tests.sh -notifications: - email: false -deploy: -- provider: pages - skip_cleanup: true - github_token: "$GITHUB_TOKEN" - local_dir: docs/build/html - on: - branch: master - python: 3.6 - tags: true - condition: "$TRAVIS_TAG =~ ^v.*$" -- provider: pypi - user: leewyang - password: - secure: T2Q8VM6SgcMtJDO2kJbaELE/5ICR5mx8pkM6TyNAJZ2Mr3fLIy6iDfPKunBAYVljl+SDEWmuoPTWqJdqMyo47LBKPKtBHbGzATqGSRTLvxLOYNSXUX+uCpPtr7CMp1eP3xpZ3YbAJZvoEFlWnBQKeBtX/PjNCpmKdp7ir+46CvR/pR1tcM5cFnSgU+uCPAMUt8KTZIxeRo+oJtaE0DM2RxLJ9nGnaRNz9fdXxwhViNj/bMnDRUI0G6k+Iy4sO2669si8nhTDr+Oq66ONUcJtAQymNUM/hzBTCkrJvuIq1TqTlKkA39UrtD5/wCkCqPUbCLVuIfNwkYfW2C8AlXcbphBKN4PhwaoL5XECr3/AOsgNpnPWhCF1Z1uLi58FhIlSyp+5c/x2wVJLZi2IE+c996An7UO3t16ZFpFEgzS6m9PVbi6Qil6Tl4AhV5QLKb0Qn0hLe2l0WixzK9KLMHfkqX8h5ZGC7i0TvCNcU2uIFjY8we91GORZKZhwUVDKbPqiUZIKn64Qq8EwJIsk/S344OrUTzm7z0lFCqtPphg1duU42QOFmaYWi6hgsbtDxN6+CubLw23G3PtKjOpNt8hHnrjZsz9H1MKbSAoYQ4fo+Iwb3owTjXnSTBr94StW7qysggWH6xQimFDh/SKOE9MfroMGt5YTXfduTbqyeameYqE= - distributions: sdist bdist_wheel - on: - branch: master - python: 3.6 - tags: true - condition: "$TRAVIS_TAG =~ ^v.*$" diff --git a/docs/source/conf.py b/docs/source/conf.py index a1e0fb71..278373d5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,9 +28,9 @@ author = 'Yahoo Inc' # The short X.Y version -version = '2.2.1' +version = '2.2.2' # The full version, including alpha/beta/rc tags -release = '2.2.1' +release = '2.2.2' # -- General configuration --------------------------------------------------- diff --git a/pom.xml b/pom.xml index 3c60e7e8..f59850f4 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 com.yahoo.ml tensorflowonspark - 2.2.0-SNAPSHOT + 2.2.2-SNAPSHOT jar tensorflowonspark Spark Scala inferencing for TensorFlowOnSpark diff --git a/requirements.txt b/requirements.txt index f8e22e6d..fbcc64fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ h5py>=2.9.0 numpy>=1.14.0 packaging py4j==0.10.7 -pyspark==2.4.5 +pyspark==2.4.7 scipy setuptools>=41.0.0 sphinx diff --git a/screwdriver.yaml b/screwdriver.yaml new file mode 100644 index 00000000..8ac65398 --- /dev/null +++ b/screwdriver.yaml @@ -0,0 +1,53 @@ +# Copyright 2017, Verizon Inc. +# Licensed under the terms of the apache license. See the LICENSE file in the project root for terms + +version: 4 +shared: + environment: + PACKAGE_DIRECTORY: tensorflowonspark + SPARK_HOME: ${SD_ROOT_DIR}/spark + TOX_ARGS: '--verbose' + TOX_ENVLIST: py37 + annotations: + screwdriver.cd/cpu: HIGH + screwdriver.cd/ram: HIGH + +jobs: + validate_test: + template: python/validate_unittest + requires: [~commit, ~pr] + steps: + - prevalidate_code: | + source scripts/install_spark.sh + + validate_lint: + template: python/validate_lint + requires: [~commit, ~pr] + + validate_codestyle: + template: python/validate_codestyle + requires: [~commit, ~pr] + + validate_safetydb: + template: python/validate_safety + requires: [~commit, ~pr] + +# validate_security: +# template: python/validate_security +# requires: [~commit, ~pr] + + publish_test_pypi: + template: python/package_python + environment: + PUBLISH: True + TWINE_REPOSITORY_URL: https://test.pypi.org/legacy/ + requires: [validate_test, validate_lint, validate_codestyle, validate_safetydb, generate_version] + steps: + - update_version: | + echo 'using version from setup.cfg' + +# publish_pypi: +# template: python/package_python +# environment: +# PUBLISH: True +# requires: [verify_test_package] diff --git a/scripts/install_spark.sh b/scripts/install_spark.sh new file mode 100644 index 00000000..73df157c --- /dev/null +++ b/scripts/install_spark.sh @@ -0,0 +1,11 @@ +#!/bin/bash -x + +# Install JDK8 +yum install -y java-1.8.0-openjdk +export JAVA_HOME=/usr/lib/jvm/jre-1.8.0 + +# Install Spark +export SPARK_VERSION=2.4.7 +curl -LO http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz +mkdir $SPARK_HOME +tar -xf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz -C $SPARK_HOME --strip-components=1 diff --git a/scripts/start_spark.sh b/scripts/start_spark.sh new file mode 100644 index 00000000..9160793e --- /dev/null +++ b/scripts/start_spark.sh @@ -0,0 +1,12 @@ +#!/bin/bash -x +#export SPARK_HOME=/opt/spark +#export SPARK_LOCAL_IP=127.0.0.1 +#export PATH=$SPARK_HOME/bin:$PATH +# +## Start Spark Standalone Cluster +#export SPARK_CLASSPATH=./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar +#export MASTER=spark://$(hostname):7077 +#export SPARK_WORKER_INSTANCES=2; export CORES_PER_WORKER=1 +#export TOTAL_CORES=$((${CORES_PER_WORKER}*${SPARK_WORKER_INSTANCES})) + +${SPARK_HOME}/sbin/start-master.sh; ${SPARK_HOME}/sbin/start-slave.sh -c ${CORES_PER_WORKER} -m 1G ${MASTER} diff --git a/scripts/stop_spark.sh b/scripts/stop_spark.sh new file mode 100755 index 00000000..983e4a1c --- /dev/null +++ b/scripts/stop_spark.sh @@ -0,0 +1,3 @@ +#!/bin/bash -x + +${SPARK_HOME}/sbin/stop-slave.sh; ${SPARK_HOME}/sbin/stop-master.sh diff --git a/scripts/travis_before_install.sh b/scripts/travis_before_install.sh deleted file mode 100644 index b886234a..00000000 --- a/scripts/travis_before_install.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -x - -# TensorFlow 2.0.0 is tested/supported on Ubuntu 16 (xenial) or later -# But Travis' xenial build env uses JDK11, while Spark requires JDK8 - -# Install JDK8 -sudo add-apt-repository -y ppa:openjdk-r/ppa -sudo apt-get update -sudo apt-get install -y openjdk-8-jdk --no-install-recommends -sudo update-java-alternatives -s java-1.8.0-openjdk-amd64 - -# Download and install Spark -export SPARK_VERSION=2.4.6 -curl -LO http://www-us.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz -export SPARK_HOME=./spark -mkdir $SPARK_HOME -tar -xf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz -C $SPARK_HOME --strip-components=1 - -# Setup environment -export SPARK_LOCAL_IP=127.0.0.1 -export SPARK_CLASSPATH=./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar -export PATH=$SPARK_HOME/bin:$PATH -export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 - -# Update Python -# Note: TensorFlow 2.0.0 requires pip>19.0 -pip install --upgrade pip -export PYTHONPATH=$(pwd) diff --git a/sd.allow b/sd.allow new file mode 100644 index 00000000..135a5d2d --- /dev/null +++ b/sd.allow @@ -0,0 +1,4 @@ +version: 1 +push: + - screwdriver:6384 + - screwdriver:6384pr diff --git a/setup.cfg b/setup.cfg index fe62be21..1cff2608 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,76 @@ +# Copyright 2017, Verizon Inc. +# Licensed under the terms of the apache license. See the LICENSE file in the project root for terms [metadata] -description-file = README.md +author = Lee Yang +author_email = leewyang@gmail.com +classifiers = + Intended Audience :: Developers + Intended Audience :: Science/Research + License :: OSI Approved :: Apache Software License + Topic :: Software Development :: Libraries + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 +description = Deep learning with TensorFlow on Apache Spark clusters +license = Apache 2.0 +long_description = file:README.md +long_description_content_type = text/markdown +name = tensorflowonspark +url = https://github.com/yahoo/TensorFlowOnSpark +version = 2.2.2 + +[options] +packages = + tensorflowonspark + +# The install_requires should include abstract package dependencies +# here (do not specify specific versions) + +install_requires = + setuptools>38.0 + +# By default new packages require at minimum the current supported Python release. +python_requires = >="3.6" +zip_safe = True + +[options.extras_require] +# This config section allows you to define optional dependencies. For the general case, the defaults will +# work fine. So these settings aren't required. However, many of the screwdriver CI Pipeline steps +# will install the appropriate extras for that step. This makes it possible to install packages that install +# or enhance the functionality of the CI Pipeline step. +# Such as packages that implement plugins or themes for the step in question. + +# Additional packages for testing (test step) +# test = + +# Additonal packages needed for documentation generation (doc_build/doc_publish steps) +# If you want to use a sphinx theme from a package, list it here. +# doc_build = + +# Additional packages needed for mypy type checking +# mypy = + +# Additional packages needed for pep8/pycodestyle style checking +# pycodestyle = + +# Additional packages needed for pylint code analysis +# pylint = + +[options.entry_points] +# Console script entry points are used to create wrapper scripts that run a specific function, the resulting wrapper +# is installed in the bin directory. + +# They are defined using the following format: +# scriptname = modulename:function +# console_scripts = +# TFoS=ouroath.TFoS.cli:main + +[screwdrivercd.version] +# Base the autoversion build number on the screwdriver build number +# This requires the CI Pipeline to have a build step that runs before +# any packaging steps. +version_type = sdv4_SD_BUILD [bdist_wheel] universal = 1 diff --git a/setup.py b/setup.py index 23e8b73e..4c1ccd5f 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,26 @@ -from setuptools import setup +#!/usr/bin/env python +# Copyright 2017, Verizon Inc. +# Licensed under the terms of the apache license. See the LICENSE file in the project root for terms +""" +Package setup file for python module 'tensorflowonspark' +""" +import setuptools +import sys -with open('README.md') as f: - long_description = f.read() -setup( - name='tensorflowonspark', - packages=['tensorflowonspark'], - version='2.2.1', - description='Deep learning with TensorFlow on Apache Spark clusters', - long_description=long_description, - long_description_content_type='text/markdown', - author='Yahoo, Inc.', - url='https://github.com/yahoo/TensorFlowOnSpark', - keywords=['tensorflowonspark', 'tensorflow', 'spark', 'machine learning', 'yahoo'], - install_requires=['packaging'], - license='Apache 2.0', - classifiers=[ - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Topic :: Software Development :: Libraries', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6' - ] -) +def setuptools_version_supported(): + major, minor, patch = setuptools.__version__.split('.') + if int(major) > 38: + return True + return False + + +if __name__ == '__main__': + # Check for a working version of setuptools here because earlier versions did not + # support python_requires. + if not setuptools_version_supported(): + print('Setuptools version 38.0.0 or higher is needed to install this package') + sys.exit(1) + + # We're being run from the command line so call setup with our arguments + setuptools.setup() diff --git a/tensorflowonspark/__init__.py b/tensorflowonspark/__init__.py index 04922663..4f6ae751 100644 --- a/tensorflowonspark/__init__.py +++ b/tensorflowonspark/__init__.py @@ -2,4 +2,4 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s (%(threadName)s-%(process)d) %(message)s") -__version__ = "2.2.1" +__version__ = "2.2.2" diff --git a/test/README.md b/tests/README.md similarity index 100% rename from test/README.md rename to tests/README.md diff --git a/test/run_tests.sh b/tests/run_tests.sh similarity index 100% rename from test/run_tests.sh rename to tests/run_tests.sh diff --git a/test/test.py b/tests/test.py similarity index 100% rename from test/test.py rename to tests/test.py diff --git a/test/test_TFCluster.py b/tests/test_TFCluster.py similarity index 100% rename from test/test_TFCluster.py rename to tests/test_TFCluster.py diff --git a/test/test_TFNode.py b/tests/test_TFNode.py similarity index 100% rename from test/test_TFNode.py rename to tests/test_TFNode.py diff --git a/test/test_TFSparkNode.py b/tests/test_TFSparkNode.py similarity index 100% rename from test/test_TFSparkNode.py rename to tests/test_TFSparkNode.py diff --git a/test/test_dfutil.py b/tests/test_dfutil.py similarity index 100% rename from test/test_dfutil.py rename to tests/test_dfutil.py diff --git a/test/test_pipeline.py b/tests/test_pipeline.py similarity index 100% rename from test/test_pipeline.py rename to tests/test_pipeline.py diff --git a/test/test_reservation.py b/tests/test_reservation.py similarity index 100% rename from test/test_reservation.py rename to tests/test_reservation.py diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..0d225d3e --- /dev/null +++ b/tox.ini @@ -0,0 +1,118 @@ +# Copyright 2017, Verizon Inc. +# Licensed under the terms of the apache license. See the LICENSE file in the project root for terms +[config] +package_dir = tensorflowonspark +package_name = tensorflowonspark + +[tox] +envlist = py37 +skip_missing_interpreters = true + +[testenv] +allowlist_externals = + bash +changedir = {toxinidir} +commands = + /bin/bash scripts/start_spark.sh + python -m unittest discover -s tests +# pytest --junitxml=pytest_{envname}.xml -o junit_suite_name={envname} --cov={[config]package_name} --cov-report=xml:coverage.xml --cov-report term-missing tests/ + /bin/bash scripts/stop_spark.sh +deps = + -rrequirements.txt + coverage + six + pytest + pytest-cov +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +passenv = SSH_AUTH_SOCK BUILD_NUMBER HOSTNAME SPARK_HOME SPARK_LOCAL_IP MASTER SPARK_WORKER_INSTANCES SPARK_CLASSPATH CORES_PER_WORKER +setenv = + SPARK_LOCAL_IP = 127.0.0.1 + MASTER = spark://{env:HOSTNAME}:7077 + SPARK_CLASSPATH = ./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar + SPARK_WORKER_INSTANCES = 2 + CORES_PER_WORKER = 1 +extras = + test + +[testenv:coverage] +commands = + coverage combine -a + coverage report -m --skip-covered +deps = + coverage + six + pytest + pytest-cov +skip_install = true + +[testenv:lint_codestyle] +deps = + six + pycodestyle +commands = {envpython} {envbindir}/pycodestyle {[config]package_dir} +changedir = {toxinidir} +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +passenv = SSH_AUTH_SOCK BUILD_NUMBER +extras = + pep8 + +[testenv:lint_pylint] +deps = + isort<=4.2.15 + six + pylint +commands = {envpython} {envbindir}/pylint --output-format=parseable {[config]package_dir} +changedir = {toxinidir} +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +passenv = SSH_AUTH_SOCK BUILD_NUMBER +extras = + pylint + +[testenv:lint_mypy] +deps = + mypy + lxml +commands = + {envpython} {envbindir}/mypy -p {[config]package_name} --ignore-missing-imports --txt-report artifacts/mypy +changedir = {toxinidir} +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +passenv = SSH_AUTH_SOCK BUILD_NUMBER +extras = + mypy + +[testenv:doc_build] +deps = + sphinx!=1.8.0 + sphinx_rtd_theme + guzzle_sphinx_theme + recommonmark + sphinx_markdown_tables +commands = {envpython} {envbindir}/sphinx-build -b html doc/source build/sphinx/html +changedir = {toxinidir} +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +passenv = SSH_AUTH_SOCK BUILD_NUMBER +extras = + doc_build +basepython = python3.6 + +[testenv:add_api_docs] +deps = + sphinx +commands = + {envpython} {envbindir}/sphinx-apidoc -T -e -M -o doc/source/ src "artifacts/*" "dist/*" "screwdriver/*" "scripts/*" setup.py "tests/*" +changedir = {toxinidir} +extras = + doc_build +passenv = SSH_AUTH_SOCK BUILD_NUMBER +install_command = {envpython} {envbindir}/pip install {opts} {packages} +list_dependencies_command = {envpython} {envbindir}/pip freeze +basepython = python3.6 + +[pycodestyle] +ignore = E1,E2,E3,E4,E5,W293 +max_line_length = 160 From 06995d3f1ced42f041a3526e44ea869529588a86 Mon Sep 17 00:00:00 2001 From: Lee Yang Date: Fri, 18 Dec 2020 09:32:38 -0800 Subject: [PATCH 2/3] update README badges; enable publish_pypi --- README.md | 5 ++++- screwdriver.yaml | 15 +++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1da47e10..098d75e7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,10 @@ Please see LICENSE file in the project root for terms. > _TensorFlowOnSpark brings scalable deep learning to Apache Hadoop and Apache Spark clusters._ -[![Build Status](https://travis-ci.org/yahoo/TensorFlowOnSpark.svg?branch=master)](https://travis-ci.org/yahoo/TensorFlowOnSpark) [![PyPI version](https://badge.fury.io/py/tensorflowonspark.svg)](https://badge.fury.io/py/tensorflowonspark) +[![Build Status](https://cd.screwdriver.cd/pipelines/6384/badge?nocache=true)](https://cd.screwdriver.cd/pipelines/6384) +[![Package](https://img.shields.io/badge/package-pypi-blue.svg)](https://pypi.org/project/tensorflowonspark/) +[![Downloads](https://img.shields.io/pypi/dm/tensorflowonspark.svg)](https://img.shields.io/pypi/dm/tensorflowonspark.svg) +[![Documentation](https://img.shields.io/badge/Documentation-latest-blue.svg)](https://yahoo.github.io/TensorFlowOnSpark/) By combining salient features from the [TensorFlow](https://www.tensorflow.org) deep learning framework with [Apache Spark](http://spark.apache.org) and [Apache Hadoop](http://hadoop.apache.org), TensorFlowOnSpark enables distributed deep learning on a cluster of GPU and CPU servers. diff --git a/screwdriver.yaml b/screwdriver.yaml index 8ac65398..9cb4e26a 100644 --- a/screwdriver.yaml +++ b/screwdriver.yaml @@ -41,13 +41,16 @@ jobs: environment: PUBLISH: True TWINE_REPOSITORY_URL: https://test.pypi.org/legacy/ - requires: [validate_test, validate_lint, validate_codestyle, validate_safetydb, generate_version] + requires: [validate_test, validate_lint, validate_codestyle, validate_safetydb] steps: - update_version: | echo 'using version from setup.cfg' -# publish_pypi: -# template: python/package_python -# environment: -# PUBLISH: True -# requires: [verify_test_package] + publish_pypi: + template: python/package_python + environment: + PUBLISH: True + requires: [publish_test_pypi] + steps: + - update_version: | + echo 'using version from setup.cfg' From f566f258416a1f7dde577413a1543631907ecf64 Mon Sep 17 00:00:00 2001 From: Lee Yang Date: Fri, 18 Dec 2020 10:13:22 -0800 Subject: [PATCH 3/3] remove SD push privs for PRs --- sd.allow | 1 - 1 file changed, 1 deletion(-) diff --git a/sd.allow b/sd.allow index 135a5d2d..4227ce43 100644 --- a/sd.allow +++ b/sd.allow @@ -1,4 +1,3 @@ version: 1 push: - screwdriver:6384 - - screwdriver:6384pr