diff --git a/assembly-ext-jython-cfunc/build.gradle b/assembly-ext-jython-cfunc/build.gradle new file mode 100644 index 0000000000..f13197b800 --- /dev/null +++ b/assembly-ext-jython-cfunc/build.gradle @@ -0,0 +1,31 @@ +apply plugin: 'java' +apply plugin: 'com.github.johnrengelman.shadow' +apply plugin: 'java-library' +apply from: "$rootDir/gradle/assembly.gradle" + +description = "Sparkling Water Assembly Jython Extension (for pypi)" + + +dependencies { + api("ai.h2o:h2o-ext-jython-cfunc:${h2oVersion}") +} + +configurations { + jythonShadowApi { + extendsFrom shadowApi + exclude group: "ai.h2o", module: "h2o-core" + } +} + +shadowJar { + configurations = [project.configurations.jythonShadowApi] + mergeServiceFiles() + + archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}" +} + +artifacts { + api shadowJar +} + +build.dependsOn shadowJar diff --git a/assembly-ext-xgboost/build.gradle b/assembly-ext-xgboost/build.gradle new file mode 100644 index 0000000000..b4d8663c70 --- /dev/null +++ b/assembly-ext-xgboost/build.gradle @@ -0,0 +1,34 @@ +apply plugin: 'java' +apply plugin: 'com.github.johnrengelman.shadow' +apply plugin: 'java-library' +apply from: "$rootDir/gradle/assembly.gradle" + +description = "Sparkling Water Assembly XGBoost Extension (for pypi)" + +dependencies { + api("ai.h2o:h2o-ext-xgboost:${h2oVersion}") +} + +configurations { + xgboostShadowApi { + extendsFrom shadowApi + exclude group: "ai.h2o", module: "h2o-core" + exclude group: "ai.h2o", module: "h2o-algos" + exclude group: "ai.h2o", module: "h2o-genmodel" + exclude group: "ai.h2o", module: "h2o-ext-steam" + } +} + +shadowJar { + configurations = [project.configurations.xgboostShadowApi] + mergeServiceFiles() + + relocate 'org.apache.http', 'ai.h2o.org.apache.http' + archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}" +} + +artifacts { + api shadowJar +} + +build.dependsOn shadowJar diff --git a/assembly-slim/build.gradle b/assembly-slim/build.gradle new file mode 100644 index 0000000000..97b935e01d --- /dev/null +++ b/assembly-slim/build.gradle @@ -0,0 +1,45 @@ +apply plugin: 'java' +apply plugin: 'com.github.johnrengelman.shadow' +apply plugin: 'java-library' +apply from: "$rootDir/gradle/assembly.gradle" + +description = "Sparkling Water Assembly Slim (for pypi)" + +dependencies { + api(project(":sparkling-water-ml")) + api(project(":sparkling-water-repl")) + api(project(":sparkling-water-core")) + api(project(":sparkling-water-extensions")) +} + +configurations { + slimShadowApi { + extendsFrom shadowApi + exclude group: "ai.h2o", module: "h2o-ext-jython-cfunc" + exclude group: "ai.h2o", module: "h2o-ext-xgboost" + } +} + +shadowJar { + configurations = [project.configurations.slimShadowApi] + mergeServiceFiles() + + relocate 'javassist', 'ai.h2o.javassist' + relocate 'com.google.common', 'ai.h2o.com.google.common' + relocate 'org.eclipse.jetty', 'ai.h2o.org.eclipse.jetty' + relocate 'org.eclipse.jetty.orbit', 'ai.h2o.org.eclipse.jetty.orbit' + relocate 'scala.compat.java8', 'ai.h2o.scala.compat.java8' + relocate 'scala.concurrent.java8', 'ai.h2o.scala.concurrent.java8' + relocate 'com.amazonaws', 'ai.h2o.com.amazonaws' + relocate 'org.apache.http', 'ai.h2o.org.apache.http' + from "$project.buildDir/reports/" include '**/*' + exclude 'www/flow/packs/test-*/**' + + archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}" +} + +artifacts { + api shadowJar +} + +build.dependsOn shadowJar diff --git a/assembly/build.gradle b/assembly/build.gradle index 235bd86bb0..2c54f8deb7 100644 --- a/assembly/build.gradle +++ b/assembly/build.gradle @@ -2,6 +2,7 @@ apply plugin: 'java' apply plugin: 'com.github.johnrengelman.shadow' apply plugin: 'com.github.jk1.dependency-license-report' apply plugin: 'java-library' +apply from: "$rootDir/gradle/assembly.gradle" import com.github.jk1.license.filter.* import com.github.jk1.license.render.* @@ -16,41 +17,6 @@ dependencies { api(project(":sparkling-water-extensions")) } -// -// Contains runtime configuration for Sparkling Water fat jar. This configuration transitively removes dependencies provided by -// Apache Spark or the Hadoop ecosystem. -// -configurations { - shadowApi { - extendsFrom api - exclude group: 'org.apache.spark' - exclude group: 'org.apache.hadoop' - exclude group: 'org.scala-lang' - exclude group: "javax.servlet", module: "servlet-api" - exclude group: 'org.apache.commons', module: 'commons-math3' // a dependency of org.apache.spark:spark-core_2.11 - exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl' - // a dependency of org.apache.spark:spark-core_2.11 - exclude group: 'commons-codec', module: 'commons-codec' // a dependency of org.apache.spark:spark-sql_2.11 - exclude group: 'commons-lang', module: 'commons-lang' // a dependency of org.apache.spark:spark-core_2.11 - exclude group: 'commons-io', module: 'commons-io' // a dependency of org.apache.spark:spark-core_2.11 - exclude group: 'commons-logging', module: 'commons-logging' // a dependency of org.apache.hadoop:hadoop-auth - exclude group: 'log4j', module: 'log4j' // a dependency of org.apache.hadoop:hadoop-auth - exclude group: 'com.google.protobuf' // a dependency of org.apache.hadoop:hadoop-common - exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core' - // a dependency of org.apache.spark:spark-sql_2.11 - exclude group: 'com.github.rwl', module: 'jtransforms' // a dependency of org.apache.spark:spark-mllib - exclude group: 'com.google.code.findbugs', module: 'jsr305' // a dependency of org.apache.hadoop:hadoop-common - exclude group: 'javax.xml.bind', module: 'jaxb-api' // a dependency of org.apache.hadoop:hadoop-yarn-common - exclude group: 'net.sourceforge.f2j', module: 'arpack_combined_all' - // a dependency of org.apache.spark:spark-graphx_2.11 - } -} - -jar { - enabled = false // we do not need to generate jar file in this case - archiveBaseName = "${jar.archiveBaseName.get()}_${scalaBaseVersion}" -} - shadowJar { configurations = [project.configurations.shadowApi] mergeServiceFiles() diff --git a/build.gradle b/build.gradle index fb726c9229..9b8181e10c 100644 --- a/build.gradle +++ b/build.gradle @@ -75,7 +75,12 @@ ext { project(':sparkling-water-doc'), project(':sparkling-water-booklet') ] - pythonProjects = [project(':sparkling-water-py'), project(':sparkling-water-py-scoring')] + pythonProjects = [ + project(':sparkling-water-py'), + project(':sparkling-water-py-ext-jython-cfunc'), + project(':sparkling-water-py-ext-xgboost'), + project(':sparkling-water-py-scoring') + ] rProjects = [project(':sparkling-water-r'), project(':sparkling-water-r-cran')] docProjects = [project(':sparkling-water-doc')] diff --git a/gradle/assembly.gradle b/gradle/assembly.gradle new file mode 100644 index 0000000000..f0751ad4d2 --- /dev/null +++ b/gradle/assembly.gradle @@ -0,0 +1,34 @@ +jar { + enabled = false // we do not need to generate jar file in this case + archiveBaseName = "${jar.archiveBaseName.get()}_${scalaBaseVersion}" +} + +// +// Contains runtime configuration for Sparkling Water fat jar. This configuration transitively removes dependencies provided by +// Apache Spark or the Hadoop ecosystem. +// +configurations { + shadowApi { + extendsFrom api + exclude group: 'org.apache.spark' + exclude group: 'org.apache.hadoop' + exclude group: 'org.scala-lang' + exclude group: "javax.servlet", module: "servlet-api" + exclude group: 'org.apache.commons', module: 'commons-math3' // a dependency of org.apache.spark:spark-core_2.11 + exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl' + // a dependency of org.apache.spark:spark-core_2.11 + exclude group: 'commons-codec', module: 'commons-codec' // a dependency of org.apache.spark:spark-sql_2.11 + exclude group: 'commons-lang', module: 'commons-lang' // a dependency of org.apache.spark:spark-core_2.11 + exclude group: 'commons-io', module: 'commons-io' // a dependency of org.apache.spark:spark-core_2.11 + exclude group: 'commons-logging', module: 'commons-logging' // a dependency of org.apache.hadoop:hadoop-auth + exclude group: 'log4j', module: 'log4j' // a dependency of org.apache.hadoop:hadoop-auth + exclude group: 'com.google.protobuf' // a dependency of org.apache.hadoop:hadoop-common + exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core' + // a dependency of org.apache.spark:spark-sql_2.11 + exclude group: 'com.github.rwl', module: 'jtransforms' // a dependency of org.apache.spark:spark-mllib + exclude group: 'com.google.code.findbugs', module: 'jsr305' // a dependency of org.apache.hadoop:hadoop-common + exclude group: 'javax.xml.bind', module: 'jaxb-api' // a dependency of org.apache.hadoop:hadoop-yarn-common + exclude group: 'net.sourceforge.f2j', module: 'arpack_combined_all' + // a dependency of org.apache.spark:spark-graphx_2.11 + } +} \ No newline at end of file diff --git a/py-ext-jython-cfunc/README.rst b/py-ext-jython-cfunc/README.rst new file mode 100644 index 0000000000..64bb0d62af --- /dev/null +++ b/py-ext-jython-cfunc/README.rst @@ -0,0 +1,21 @@ +PySparkling Jython-cfunc Extension +================================== + +This package contains Jython library and other dependecies needed for execution of custom metrics. + +|Join the chat at https://gitter.im/h2oai/sparkling-water| |License| |Powered by H2O.ai| + +PySparkling Documentation is hosted at our documentation page: + +- For Spark 3.1 - http://docs.h2o.ai/sparkling-water/3.1/latest-stable/doc/pysparkling.html +- For Spark 3.0 - http://docs.h2o.ai/sparkling-water/3.0/latest-stable/doc/pysparkling.html +- For Spark 2.4 - http://docs.h2o.ai/sparkling-water/2.4/latest-stable/doc/pysparkling.html +- For Spark 2.3 - http://docs.h2o.ai/sparkling-water/2.3/latest-stable/doc/pysparkling.html +- For Spark 2.2 - http://docs.h2o.ai/sparkling-water/2.2/latest-stable/doc/pysparkling.html + +.. |Join the chat at https://gitter.im/h2oai/sparkling-water| image:: https://badges.gitter.im/Join%20Chat.svg + :target: Join the chat at https://gitter.im/h2oai/sparkling-water?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge +.. |License| image:: https://img.shields.io/badge/License-Apache%202-blue.svg + :target: LICENSE +.. |Powered by H2O.ai| image:: https://img.shields.io/badge/powered%20by-h2oai-yellow.svg + :target: https://github.com/h2oai/ diff --git a/py-ext-jython-cfunc/build.gradle b/py-ext-jython-cfunc/build.gradle new file mode 100644 index 0000000000..2cbe1f82a4 --- /dev/null +++ b/py-ext-jython-cfunc/build.gradle @@ -0,0 +1,133 @@ +description = "PySparklingScoring - Sparkling Water Python Scoring Package" + +apply from: "$rootDir/gradle/utils.gradle" +apply plugin: 'ru.vyarus.use-python' + +import ru.vyarus.gradle.plugin.python.cmd.Python + +def getPythonVersion() { + Python p = new Python(project, python.getPythonPath(), python.getPythonBinary()) + return p.version +} + +ext { + FS = File.separator + FPS = File.pathSeparator + pythonBinary = findProperty("pythonBinary") ?: "python" + pkgDir = file("$buildDir/pkg") + distDir = file("$buildDir/dist") + condaDir = file("$buildDir/conda/h2o_pysparkling_ext_jython_cfunc") +} + +// Define the environment required to run tests +python { + if (project.hasProperty("pythonBinary")) { + pythonBinary project.findProperty("pythonBinary").toString() + } + if (project.hasProperty("pythonPath")) { + pythonPath project.findProperty("pythonPath").toString() + } + + if (project.hasProperty("pythonEnvBasePath")) { + // for CI as we use pre-cached environment + envPath "${project.findProperty("pythonEnvBasePath")}/${getPythonVersion()}/${sparkVersion}" + } else { + envPath "${rootDir}/.gradle/python/${getPythonVersion()}/${sparkVersion}" + } +} + +configurations { + sparklingWaterAssemblyJar +} + +dependencies { + sparklingWaterAssemblyJar project(path: ':sparkling-water-assembly-ext-jython-cfunc', configuration: 'shadow') +} + +task createVersionFile { + doLast { + def versionFileDir = new File(pkgDir, "sparkling_water_ext_jython_cfunc") + if (!versionFileDir.exists()) { + versionFileDir.mkdirs() + } + File version_file = new File(versionFileDir, "version.txt") + + def version_txt = version + version_file.write(version_txt) + } +} + +def copyPySetup() { + copy { + from("$projectDir/src") { + include 'setup.py' + } + filter { + it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_")) + .replaceAll("SUBST_PYTHON_VERSIONS", convertSupportedPythonVersionsToPyPiDefinitions(pythonEnvironments)) + } + into pkgDir + } + + copy { + from("$projectDir") { + include 'README.rst' + } + into pkgDir + } + + copy { + from("$projectDir/src") { + include 'MANIFEST.in' + include 'setup.cfg' + exclude '**/*.pyc' + } + into pkgDir + } + + copy { + from("$projectDir/conda/h2o_pysparkling_ext_jython_cfunc") { + include 'bld.bat' + include 'build.sh' + } + into condaDir + } + + copy { + from("$projectDir/conda/h2o_pysparkling_ext_jython_cfunc") { + include 'meta.yaml' + } + filter { + it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_")) + } + into condaDir + } +} + +task createPkg(dependsOn: configurations.sparklingWaterAssemblyJar) { + copyPySetup() + // Copy sparkling water assembly jar + def fatJar = configurations.sparklingWaterAssemblyJar.singleFile + copy { + from fatJar + into file("${project.pkgDir}/sparkling_water_ext_jython_cfunc") + rename ".*", "sparkling_water_ext_jython_cfunc_assembly.jar" + } + // Save comment into module file + file("${project.pkgDir}/sparkling_water_ext_jython_cfunc/").mkdir() + file("${project.pkgDir}/sparkling_water_ext_jython_cfunc/__init__.py").write("# Sparkling-water JAR holder for pySparklingExtJythonCfunc module.") +} + +// +// Cleanup +// +task cleanPython(type: Delete) { + delete getBuildDir() +} + +// +// Setup execution graph +// +clean.dependsOn cleanPython +createPkg.dependsOn createVersionFile +build.dependsOn createPkg diff --git a/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/bld.bat b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/bld.bat new file mode 100644 index 0000000000..ad95092139 --- /dev/null +++ b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/bld.bat @@ -0,0 +1,2 @@ +%PYTHON% setup.py install --single-version-externally-managed --record=record.txt +if errorlevel 1 exit 1 diff --git a/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/build.sh b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/build.sh new file mode 100644 index 0000000000..c3392ee62b --- /dev/null +++ b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/build.sh @@ -0,0 +1,2 @@ +#/bin/bash +$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/meta.yaml b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/meta.yaml new file mode 100644 index 0000000000..2dbaf2614e --- /dev/null +++ b/py-ext-jython-cfunc/conda/h2o_pysparkling_ext_jython_cfunc/meta.yaml @@ -0,0 +1,20 @@ +package: + name: h2o_pysparkling_ext_jython_cfunc + version: SUBST_SW_VERSION + +source: + path: ../../pkg + +requirements: + build: + - python + - pip >=9.0.1 + - setuptools + + run: + - python + +about: + home: https://github.com/h2oai/sparkling-water.git + license: Apache License Version 2.0 + license_family: Apache diff --git a/py-ext-jython-cfunc/src/setup.cfg b/py-ext-jython-cfunc/src/setup.cfg new file mode 100644 index 0000000000..3c53ece0a9 --- /dev/null +++ b/py-ext-jython-cfunc/src/setup.cfg @@ -0,0 +1,7 @@ +[bdist_wheel] +# This flag says that the code is written to work on both Python 2 and Python +# 3. +# I.e.: +# 1. Your project runs on Python 2 and 3 with no changes (i.e. it does not require 2to3). +# 2. Your project does not have any C extensions. +universal=1 diff --git a/py-ext-jython-cfunc/src/setup.py b/py-ext-jython-cfunc/src/setup.py new file mode 100644 index 0000000000..01736681fb --- /dev/null +++ b/py-ext-jython-cfunc/src/setup.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +from codecs import open +from os import path +from setuptools import setup, find_packages + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the relevant file +with open(path.join(here, 'README.rst'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='h2o_pysparkling_ext_jython_cfunc', + + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version="SUBST_SW_VERSION", + description='Sparkling Water integrates H2O\'s Fast Scalable Machine Learning with Spark', + long_description=long_description, + + url='https://github.com/h2oai/sparkling-water', + download_url='https://github.com/h2oai/sparkling-water/', + author='H2O.ai', + author_email='support@h2o.ai', + license='Apache v2', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Build Tools', + 'License :: OSI Approved :: Apache Software License', +SUBST_PYTHON_VERSIONS + ], + keywords='machine learning, data mining, statistical analysis, modeling, big data, distributed, parallel', + + # find python packages starting in the current directory + packages=find_packages(), + + # run-time dependencies + install_requires=[], + + # bundled binary packages + package_data={'sparkling_water_ext_jython_cfunc': ['*.jar', 'version.txt']}}, +) diff --git a/py-ext-xgboost/README.rst b/py-ext-xgboost/README.rst new file mode 100644 index 0000000000..31cc88ecf0 --- /dev/null +++ b/py-ext-xgboost/README.rst @@ -0,0 +1,21 @@ +PySparkling XGBoost Extension +============================= + +This package contains libraries for needed for training and scoring with XGBoost algorithm. + +|Join the chat at https://gitter.im/h2oai/sparkling-water| |License| |Powered by H2O.ai| + +PySparkling Documentation is hosted at our documentation page: + +- For Spark 3.1 - http://docs.h2o.ai/sparkling-water/3.1/latest-stable/doc/pysparkling.html +- For Spark 3.0 - http://docs.h2o.ai/sparkling-water/3.0/latest-stable/doc/pysparkling.html +- For Spark 2.4 - http://docs.h2o.ai/sparkling-water/2.4/latest-stable/doc/pysparkling.html +- For Spark 2.3 - http://docs.h2o.ai/sparkling-water/2.3/latest-stable/doc/pysparkling.html +- For Spark 2.2 - http://docs.h2o.ai/sparkling-water/2.2/latest-stable/doc/pysparkling.html + +.. |Join the chat at https://gitter.im/h2oai/sparkling-water| image:: https://badges.gitter.im/Join%20Chat.svg + :target: Join the chat at https://gitter.im/h2oai/sparkling-water?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge +.. |License| image:: https://img.shields.io/badge/License-Apache%202-blue.svg + :target: LICENSE +.. |Powered by H2O.ai| image:: https://img.shields.io/badge/powered%20by-h2oai-yellow.svg + :target: https://github.com/h2oai/ diff --git a/py-ext-xgboost/build.gradle b/py-ext-xgboost/build.gradle new file mode 100644 index 0000000000..015bc73e64 --- /dev/null +++ b/py-ext-xgboost/build.gradle @@ -0,0 +1,132 @@ +description = "PySparklingScoring - Sparkling Water Python Scoring Package" + +apply from: "$rootDir/gradle/utils.gradle" +apply plugin: 'ru.vyarus.use-python' + +import ru.vyarus.gradle.plugin.python.cmd.Python + +def getPythonVersion() { + Python p = new Python(project, python.getPythonPath(), python.getPythonBinary()) + return p.version +} + +ext { + FS = File.separator + FPS = File.pathSeparator + pythonBinary = findProperty("pythonBinary") ?: "python" + pkgDir = file("$buildDir/pkg") + condaDir = file("$buildDir/conda/h2o_pysparkling_ext_xgboost") +} + +// Define the environment required to run tests +python { + if (project.hasProperty("pythonBinary")) { + pythonBinary project.findProperty("pythonBinary").toString() + } + if (project.hasProperty("pythonPath")) { + pythonPath project.findProperty("pythonPath").toString() + } + + if (project.hasProperty("pythonEnvBasePath")) { + // for CI as we use pre-cached environment + envPath "${project.findProperty("pythonEnvBasePath")}/${getPythonVersion()}/${sparkVersion}" + } else { + envPath "${rootDir}/.gradle/python/${getPythonVersion()}/${sparkVersion}" + } +} + +configurations { + sparklingWaterAssemblyJar +} + +dependencies { + sparklingWaterAssemblyJar project(path: ':sparkling-water-assembly-ext-xgboost', configuration: 'shadow') +} + +task createVersionFile { + doLast { + def versionFileDir = new File(pkgDir, "sparkling_water_ext_xgboost") + if (!versionFileDir.exists()) { + versionFileDir.mkdirs() + } + File version_file = new File(versionFileDir, "version.txt") + + def version_txt = version + version_file.write(version_txt) + } +} + +def copyPySetup() { + copy { + from("$projectDir/src") { + include 'setup.py' + } + filter { + it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_")) + .replaceAll("SUBST_PYTHON_VERSIONS", convertSupportedPythonVersionsToPyPiDefinitions(pythonEnvironments)) + } + into pkgDir + } + + copy { + from("$projectDir") { + include 'README.rst' + } + into pkgDir + } + + copy { + from("$projectDir/src") { + include 'MANIFEST.in' + include 'setup.cfg' + exclude '**/*.pyc' + } + into pkgDir + } + + copy { + from("$projectDir/conda/h2o_pysparkling_ext_xgboost") { + include 'bld.bat' + include 'build.sh' + } + into condaDir + } + + copy { + from("$projectDir/conda/h2o_pysparkling_ext_xgboost") { + include 'meta.yaml' + } + filter { + it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_")) + } + into condaDir + } +} + +task createPkg(dependsOn: configurations.sparklingWaterAssemblyJar) { + copyPySetup() + // Copy sparkling water assembly jar + def fatJar = configurations.sparklingWaterAssemblyJar.singleFile + copy { + from fatJar + into file("${project.pkgDir}/sparkling_water_ext_xgboost") + rename ".*", "sparkling_water_ext_xgboost_assembly.jar" + } + // Save comment into module file + file("${project.pkgDir}/sparkling_water_ext_xgboost/").mkdir() + file("${project.pkgDir}/sparkling_water_ext_xgboost/__init__.py").write("# Sparkling-water JAR holder for pySparklingExtXgboost module.") +} + +// +// Cleanup +// +task cleanPython(type: Delete) { + delete getBuildDir() +} + +// +// Setup execution graph +// +clean.dependsOn cleanPython +createPkg.dependsOn createVersionFile +build.dependsOn createPkg diff --git a/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/bld.bat b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/bld.bat new file mode 100644 index 0000000000..ad95092139 --- /dev/null +++ b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/bld.bat @@ -0,0 +1,2 @@ +%PYTHON% setup.py install --single-version-externally-managed --record=record.txt +if errorlevel 1 exit 1 diff --git a/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/build.sh b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/build.sh new file mode 100644 index 0000000000..c3392ee62b --- /dev/null +++ b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/build.sh @@ -0,0 +1,2 @@ +#/bin/bash +$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/meta.yaml b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/meta.yaml new file mode 100644 index 0000000000..b2db79fa8d --- /dev/null +++ b/py-ext-xgboost/conda/h2o_pysparkling_ext_xgboost/meta.yaml @@ -0,0 +1,20 @@ +package: + name: h2o_pysparkling_ext_xgboost + version: SUBST_SW_VERSION + +source: + path: ../../pkg + +requirements: + build: + - python + - pip >=9.0.1 + - setuptools + + run: + - python + +about: + home: https://github.com/h2oai/sparkling-water.git + license: Apache License Version 2.0 + license_family: Apache diff --git a/py-ext-xgboost/src/setup.cfg b/py-ext-xgboost/src/setup.cfg new file mode 100644 index 0000000000..3c53ece0a9 --- /dev/null +++ b/py-ext-xgboost/src/setup.cfg @@ -0,0 +1,7 @@ +[bdist_wheel] +# This flag says that the code is written to work on both Python 2 and Python +# 3. +# I.e.: +# 1. Your project runs on Python 2 and 3 with no changes (i.e. it does not require 2to3). +# 2. Your project does not have any C extensions. +universal=1 diff --git a/py-ext-xgboost/src/setup.py b/py-ext-xgboost/src/setup.py new file mode 100644 index 0000000000..a2a27a93c7 --- /dev/null +++ b/py-ext-xgboost/src/setup.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +from codecs import open +from os import path +from setuptools import setup, find_packages + +here = path.abspath(path.dirname(__file__)) + +# Get the long description from the relevant file +with open(path.join(here, 'README.rst'), encoding='utf-8') as f: + long_description = f.read() + +setup( + name='h2o_pysparkling_ext_xgboost', + + # Versions should comply with PEP440. For a discussion on single-sourcing + # the version across setup.py and the project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version="SUBST_SW_VERSION", + description='Sparkling Water integrates H2O\'s Fast Scalable Machine Learning with Spark', + long_description=long_description, + + url='https://github.com/h2oai/sparkling-water', + download_url='https://github.com/h2oai/sparkling-water/', + author='H2O.ai', + author_email='support@h2o.ai', + license='Apache v2', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Build Tools', + 'License :: OSI Approved :: Apache Software License', +SUBST_PYTHON_VERSIONS + ], + keywords='machine learning, data mining, statistical analysis, modeling, big data, distributed, parallel', + + # find python packages starting in the current directory + packages=find_packages(), + + # run-time dependencies + install_requires=[], + + # bundled binary packages + package_data={'sparkling_water_ext_xgboost': ['*.jar', 'version.txt']}}, +) diff --git a/py-scoring/src/MANIFEST.in b/py-scoring/src/MANIFEST.in index d9038b21fa..bb4837c489 100644 --- a/py-scoring/src/MANIFEST.in +++ b/py-scoring/src/MANIFEST.in @@ -1,4 +1,4 @@ # include additional files which are not fetched by default by setuptools # We need to manually specify version files for h2o and sparkling water to make sdist and pip happy -include ai/h2o/sparkling/version.txt +include sparkling_water_ext_xgboost/version.txt diff --git a/py-scoring/src/ai/h2o/sparkling/BackingJar.py b/py-scoring/src/ai/h2o/sparkling/BackingJar.py index 5880212218..88ecbaaa51 100644 --- a/py-scoring/src/ai/h2o/sparkling/BackingJar.py +++ b/py-scoring/src/ai/h2o/sparkling/BackingJar.py @@ -25,3 +25,27 @@ def getName(): @staticmethod def getRelativePath(): return "sparkling_water/" + BackingJar.getName() + + +class BackingJar(object): + + def __init__(self, name, module): + self._name = name + self._module = module + + def getName(self): + return self._name + + def getModule(self): + return self._module + + def getRelativePath(self): + return self._module + "/" + self._name + + @staticmethod + def getMainBackingJar(): + return BackingJar.getBackingJars()[0] + + @staticmethod + def getBackingJars(): + return [new BackingJar(name = "sparkling_water_scoring_assembly.jar", module = "sparkling_water"),] diff --git a/py-scoring/src/ai/h2o/sparkling/Initializer.py b/py-scoring/src/ai/h2o/sparkling/Initializer.py index f6ef4f386b..97bc76225c 100644 --- a/py-scoring/src/ai/h2o/sparkling/Initializer.py +++ b/py-scoring/src/ai/h2o/sparkling/Initializer.py @@ -49,15 +49,24 @@ class Initializer(object): def __setUpPySparkSubmitArgs(): # Ensure that when we do import pysparkling, spark will put later the JAR file # to the driver. This option has effect only when SparkContext has not been started before. + swjars = "" + delimiter = False + for backingJar in BackingJar.getBackingJars(): + if delimiter: + swjars = swjars + "," + else: + delimiter = True + swjars = swjars + Initializer.__get_sw_jar(None, backingJar) + if os.environ.get('PYSPARK_SUBMIT_ARGS') is None: - os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars " + Initializer.__get_sw_jar(None) + " pyspark-shell" + os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars " + swjars + " pyspark-shell" else: value = os.environ.get('PYSPARK_SUBMIT_ARGS') if "--jars" not in value: - os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars " + Initializer.__get_sw_jar(None) + " " + value + os.environ["PYSPARK_SUBMIT_ARGS"] = "--jars " + swjars + " " + value else: pos = re.search("--jars\\s+", value).end() - os.environ["PYSPARK_SUBMIT_ARGS"] = value[:pos] + Initializer.__get_sw_jar(None) + "," + value[pos:] + os.environ["PYSPARK_SUBMIT_ARGS"] = value[:pos] + swjars + "," + value[pos:] @staticmethod def load_sparkling_jar(): @@ -70,7 +79,8 @@ def load_sparkling_jar(): stream = jvm.Thread.currentThread().getContextClassLoader().getResourceAsStream("sw.version") if stream is None: sys.path.append(".") - Initializer.__add_sparkling_jar_to_spark(sc) + for backingJar in BackingJar.getBackingJars(): + Initializer.__add_sparkling_jar_to_spark(sc, backingJar) else: otherVersion = jvm.scala.io.Source.fromInputStream(stream, "UTF-8").mkString() currentVersion = Initializer.getVersion() @@ -83,10 +93,10 @@ def load_sparkling_jar(): Initializer.__sparklingWaterJarLoaded = True @staticmethod - def __add_sparkling_jar_to_spark(sc): + def __add_sparkling_jar_to_spark(sc, backingJar): gateway = sc._gateway # Add Sparkling water assembly JAR to driver - sw_jar_file = Initializer.__get_sw_jar(sc) + sw_jar_file = Initializer.__get_sw_jar(sc, backingJar) # SW-593 - adding an extra / to fix a windows shell issue creating malform url if not sw_jar_file.startswith('/'): @@ -105,25 +115,25 @@ def __removeTmpDir(): shutil.rmtree(Initializer.__extracted_jar_dir) @staticmethod - def __extracted_jar_path(sc): + def __extracted_jar_path(sc, backingJar): if Initializer.__extracted_jar_dir is None: - zip_file = Initializer.__get_pysparkling_package_path() + zip_file = Initializer.__get_pysparkling_package_path(backingJar) if sc is None: Initializer.__extracted_jar_dir = tempfile.mkdtemp() atexit.register(Initializer.__removeTmpDir) else: Initializer.__extracted_jar_dir = sc._temp_dir with zipfile.ZipFile(zip_file) as fzip: - fzip.extract(BackingJar.getRelativePath(), path=Initializer.__extracted_jar_dir) + fzip.extract(backingJar.getRelativePath(), path=Initializer.__extracted_jar_dir) - return os.path.abspath("{}/{}".format(Initializer.__extracted_jar_dir, BackingJar.getRelativePath())) + return os.path.abspath("{}/{}".format(Initializer.__extracted_jar_dir, backingJar.getRelativePath())) @staticmethod - def __get_pysparkling_package_path(): - import sparkling_water - sw_pkg_file = sparkling_water.__file__ - return sw_pkg_file[:-len('/sparkling_water/__init__.py')] + def __get_pysparkling_package_path(backingJar): + module = __import__(backingJar.getModule()) + pkg_file = module.__file__ + return pkg_file[:-len('/' + backingJar.getModule() + '/__init__.py')] @staticmethod def check_different_h2o(): @@ -136,7 +146,7 @@ def check_different_h2o(): try: import h2o sw_h2o_version = h2o.__version__ - zip_file_name = os.path.basename(Initializer.__get_pysparkling_package_path()) + zip_file_name = os.path.basename(Initializer.__get_pysparkling_package_path(BackingJar.getMainBackingJar())) path_without_sw = [i for i in sys.path if os.path.basename(i) != zip_file_name] command_sys_path = "import sys; sys.path = " + str(path_without_sw).replace("'", "\"") + ";" command_import_h2o = "import h2o; print(h2o.__version__)" @@ -153,14 +163,14 @@ def check_different_h2o(): pass @staticmethod - def __get_sw_jar(sc): - packagePath = Initializer.__get_pysparkling_package_path() + def __get_sw_jar(sc, backingJar): + packagePath = Initializer.__get_pysparkling_package_path(backingJar) # Extract jar file from zip if zipfile.is_zipfile(packagePath): - return Initializer.__extracted_jar_path(sc) + return Initializer.__extracted_jar_path(sc, backingJar) else: from pkg_resources import resource_filename - return os.path.abspath(resource_filename("sparkling_water", BackingJar.getName())) + return os.path.abspath(resource_filename(backingJar.getModule(), backingJar.getName())) @staticmethod def __get_logger(jvm): @@ -201,7 +211,7 @@ def isRunningViaDBCConnect(): @staticmethod def getVersion(): - packagePath = Initializer.__get_pysparkling_package_path() + packagePath = Initializer.__get_pysparkling_package_path(BackingJar.getMainBackingJar()) versionFile = 'ai/h2o/sparkling/version.txt' if zipfile.is_zipfile(packagePath): with zipfile.ZipFile(packagePath, 'r') as archive: diff --git a/py/build.gradle b/py/build.gradle index 5428f0189b..d2626945be 100644 --- a/py/build.gradle +++ b/py/build.gradle @@ -15,6 +15,7 @@ ext { FPS = File.pathSeparator pythonBinary = findProperty("pythonBinary") ?: "python" pkgDir = file("$buildDir/pkg") + distSourceDir = file("$buildDir/distSource") distDir = file("$buildDir/dist") condaDir = file("$buildDir/conda/h2o_pysparkling_${sparkMajorVersion}") wheelFile = file("${rootDir}/.gradle/wheels/h2o-${h2oMajorVersion}.${h2oBuild}-py2.py3-none-any.whl") @@ -47,12 +48,16 @@ python { } configurations { - sparklingWaterAssemblyJar + sparklingWaterSlimAssemblyJar + sparklingWaterXgboostAssemblyJar + sparklingWaterJythonAssemblyJar apiGeneration } dependencies { - sparklingWaterAssemblyJar project(path: ':sparkling-water-assembly', configuration: 'shadow') + sparklingWaterSlimAssemblyJar project(path: ':sparkling-water-assembly-slim', configuration: 'shadow') + sparklingWaterXgboostAssemblyJar project(path: ':sparkling-water-assembly-ext-xgboost', configuration: 'shadow') + sparklingWaterJythonAssemblyJar project(path: ':sparkling-water-assembly-ext-jython-cfunc', configuration: 'shadow') apiGeneration "org.scala-lang:scala-library:${scalaVersion}" apiGeneration project(path: ':sparkling-water-api-generation', configuration: 'shadow') apiGeneration project(path: ':sparkling-water-assembly', configuration: 'shadow') @@ -223,12 +228,8 @@ def copyH2OFromH2OWheel(File h2oPythonWheel) { } } -// -// Make PySparkling distribution zip package -// -task distPython(type: Zip, dependsOn: [checkPythonEnv, configurations.sparklingWaterAssemblyJar]) { - - doFirst { +task createPkg(dependsOn: [checkPythonEnv, configurations.sparklingWaterSlimAssemblyJar]) { + doLast { def H2O_HOME = System.getenv("H2O_HOME") // if both properties are set, give precedence to H2O_HOME @@ -240,7 +241,7 @@ task distPython(type: Zip, dependsOn: [checkPythonEnv, configurations.sparklingW // Copy basic python setup copyPySetup() def replaceStr = - """ + """ import zipfile from os import path @@ -283,20 +284,43 @@ else: } // Copy sparkling water assembly jar - def fatJar = configurations.sparklingWaterAssemblyJar.singleFile + def fatJar = configurations.sparklingWaterSlimAssemblyJar.singleFile copy { from fatJar into file("${project.pkgDir}/sparkling_water") - rename ".*", "sparkling_water_assembly.jar" + rename ".*", "sparkling_water_slim_assembly.jar" } // Save comment into module file file("${project.pkgDir}/sparkling_water/").mkdir() file("${project.pkgDir}/sparkling_water/__init__.py").write("# Sparkling-water JAR holder for pySparkling module.") } +} + +task createDistSourceDir(type: Copy, dependsOn: createPkg) { + from pkgDir + into distSourceDir +} + +task distPython(type: Zip, dependsOn: [createDistSourceDir]) { + doFirst { + copy { + from configurations.sparklingWaterXgboostAssemblyJar.singleFile + into file("${distSourceDir}/sparkling_water_ext_xgboost") + rename ".*", "sparkling_water_ext_xgboost_assembly.jar" + } + file("${distSourceDir}/sparkling_water_ext_xgboost/__init__.py").write("# Sparkling-water JAR holder for pySparklingExtXgboost module.") + copy { + from configurations.sparklingWaterJythonAssemblyJar.singleFile + into file("${distSourceDir}/sparkling_water_ext_jython_cfunc") + rename ".*", "sparkling_water_ext_jython_cfunc_assembly.jar" + } + file("${distSourceDir}/sparkling_water_ext_jython_cfunc/__init__.py").write("# Sparkling-water JAR holder for pySparklingExtJythonCfunc module.") + } + // Configure proper name archiveBaseName = "h2o_pysparkling_${sparkMajorVersion}" - from pkgDir + from distSourceDir destinationDirectory = distDir } @@ -324,7 +348,7 @@ def createIntegTestArgs() { "spark.test.home=${sparkHome}", "spark.ext.h2o.log.dir=${buildDir}/h2ologs-itest", "spark.ext.h2o.external.disable.version.check=true", - "spark.ext.h2o.testing.path.to.sw.jar=${configurations.sparklingWaterAssemblyJar.singleFile}" + "spark.ext.h2o.testing.path.to.sw.jar=${configurations.sparklingWaterSlimAssemblyJar.singleFile}" ] } @@ -429,10 +453,10 @@ task generateConf(type: JavaExec, dependsOn: [cleanGeneratedApi, ':sparkling-wat clean.dependsOn cleanPython clean.dependsOn cleanGeneratedApi createVersionFile.dependsOn clean -distPython.dependsOn createVersionFile -distPython.dependsOn generateAlgorithmApi -distPython.dependsOn generateMOJOApi -distPython.dependsOn generateConf +createPkg.dependsOn createVersionFile +createPkg.dependsOn generateAlgorithmApi +createPkg.dependsOn generateMOJOApi +createPkg.dependsOn generateConf build.dependsOn distPython test.dependsOn testPython diff --git a/py/conda/h2o_pysparkling_SUBST_SPARK_MAJOR_VERSION/meta.yaml b/py/conda/h2o_pysparkling_SUBST_SPARK_MAJOR_VERSION/meta.yaml index c1cd0e1c8f..f7166b25b6 100644 --- a/py/conda/h2o_pysparkling_SUBST_SPARK_MAJOR_VERSION/meta.yaml +++ b/py/conda/h2o_pysparkling_SUBST_SPARK_MAJOR_VERSION/meta.yaml @@ -19,6 +19,8 @@ requirements: - future >=0.15.2 - tabulate >=0.7.5 - requests >=2.10 + - h2o_pysparkling_ext_xgboost == SUBST_SW_VERSION + - h2o_pysparkling_ext_jython_cfunc == SUBST_SW_VERSION about: home: https://github.com/h2oai/sparkling-water.git diff --git a/py/src/ai/h2o/sparkling/BackingJar.py b/py/src/ai/h2o/sparkling/BackingJar.py index f55909a291..c701b7ff67 100644 --- a/py/src/ai/h2o/sparkling/BackingJar.py +++ b/py/src/ai/h2o/sparkling/BackingJar.py @@ -18,10 +18,27 @@ class BackingJar(object): + def __init__(self, name, module): + self._name = name + self._module = module + + def getName(self): + return self._name + + def getModule(self): + return self._module + + def getRelativePath(self): + return self._module + "/" + self._name + @staticmethod - def getName(): - return "sparkling_water_assembly.jar" + def getMainBackingJar(): + return BackingJar.getBackingJars()[0] @staticmethod - def getRelativePath(): - return "sparkling_water/" + BackingJar.getName() + def getBackingJars(): + return [ + new BackingJar(name = "sparkling_water_slim_assembly.jar", module = "sparkling_water"), + new BackingJar(name = "sparkling_water_ext_xgboost_assembly.jar", module = "sparkling_water_ext_xgboost"), + new BackingJar(name = "sparkling_water_ext_jython_cfunc_assembly.jar", module = "sparkling_water_ext_jython_cfunc") + ] diff --git a/py/src/setup.py b/py/src/setup.py index 7e02aab9be..bcf8f66150 100644 --- a/py/src/setup.py +++ b/py/src/setup.py @@ -41,7 +41,9 @@ install_requires=[ 'requests', 'tabulate', - 'future'], + 'future', + 'h2o_pysparkling_ext_xgboost==SUBST_SW_VERSION', + 'h2o_pysparkling_ext_jython_cfunc==SUBST_SW_VERSION'], # bundled binary packages package_data={'sparkling_water': ['*.jar'], diff --git a/settings.gradle b/settings.gradle index 25f9729db1..12a7a26134 100644 --- a/settings.gradle +++ b/settings.gradle @@ -6,8 +6,13 @@ include 'examples' include 'ml' include 'assembly' include 'assembly-scoring' +include 'assembly-slim' include 'assembly-extensions' +include 'assembly-ext-jython-cfunc' +include 'assembly-ext-xgboost' include 'py' +include 'py-ext-jython-cfunc' +include 'py-ext-xgboost' include 'py-scoring' include 'r' include 'r-cran'