Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SW-2687] Move XGBoost and Jython libraries to separate Python artifacts #2728

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions assembly-ext-jython-cfunc/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'java-library'
apply from: "$rootDir/gradle/assembly.gradle"

description = "Sparkling Water Assembly Jython Extension (for pypi)"


dependencies {
api("ai.h2o:h2o-ext-jython-cfunc:${h2oVersion}")
}

configurations {
jythonShadowApi {
extendsFrom shadowApi
exclude group: "ai.h2o", module: "h2o-core"
}
}

shadowJar {
configurations = [project.configurations.jythonShadowApi]
mergeServiceFiles()

archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}"
}

artifacts {
api shadowJar
}

build.dependsOn shadowJar
34 changes: 34 additions & 0 deletions assembly-ext-xgboost/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'java-library'
apply from: "$rootDir/gradle/assembly.gradle"

description = "Sparkling Water Assembly XGBoost Extension (for pypi)"

dependencies {
api("ai.h2o:h2o-ext-xgboost:${h2oVersion}")
}

configurations {
xgboostShadowApi {
extendsFrom shadowApi
exclude group: "ai.h2o", module: "h2o-core"
exclude group: "ai.h2o", module: "h2o-algos"
exclude group: "ai.h2o", module: "h2o-genmodel"
exclude group: "ai.h2o", module: "h2o-ext-steam"
}
}

shadowJar {
configurations = [project.configurations.xgboostShadowApi]
mergeServiceFiles()

relocate 'org.apache.http', 'ai.h2o.org.apache.http'
archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}"
}

artifacts {
api shadowJar
}

build.dependsOn shadowJar
45 changes: 45 additions & 0 deletions assembly-slim/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'java-library'
apply from: "$rootDir/gradle/assembly.gradle"

description = "Sparkling Water Assembly Slim (for pypi)"

dependencies {
api(project(":sparkling-water-ml"))
api(project(":sparkling-water-repl"))
api(project(":sparkling-water-core"))
api(project(":sparkling-water-extensions"))
}

configurations {
slimShadowApi {
extendsFrom shadowApi
exclude group: "ai.h2o", module: "h2o-ext-jython-cfunc"
exclude group: "ai.h2o", module: "h2o-ext-xgboost"
}
}

shadowJar {
configurations = [project.configurations.slimShadowApi]
mergeServiceFiles()

relocate 'javassist', 'ai.h2o.javassist'
relocate 'com.google.common', 'ai.h2o.com.google.common'
relocate 'org.eclipse.jetty', 'ai.h2o.org.eclipse.jetty'
relocate 'org.eclipse.jetty.orbit', 'ai.h2o.org.eclipse.jetty.orbit'
relocate 'scala.compat.java8', 'ai.h2o.scala.compat.java8'
relocate 'scala.concurrent.java8', 'ai.h2o.scala.concurrent.java8'
relocate 'com.amazonaws', 'ai.h2o.com.amazonaws'
relocate 'org.apache.http', 'ai.h2o.org.apache.http'
from "$project.buildDir/reports/" include '**/*'
exclude 'www/flow/packs/test-*/**'

archiveBaseName = "${archiveBaseName.get()}_${scalaBaseVersion}"
}

artifacts {
api shadowJar
}

build.dependsOn shadowJar
36 changes: 1 addition & 35 deletions assembly/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ apply plugin: 'java'
apply plugin: 'com.github.johnrengelman.shadow'
apply plugin: 'com.github.jk1.dependency-license-report'
apply plugin: 'java-library'
apply from: "$rootDir/gradle/assembly.gradle"

import com.github.jk1.license.filter.*
import com.github.jk1.license.render.*
Expand All @@ -16,41 +17,6 @@ dependencies {
api(project(":sparkling-water-extensions"))
}

//
// Contains runtime configuration for Sparkling Water fat jar. This configuration transitively removes dependencies provided by
// Apache Spark or the Hadoop ecosystem.
//
configurations {
shadowApi {
extendsFrom api
exclude group: 'org.apache.spark'
exclude group: 'org.apache.hadoop'
exclude group: 'org.scala-lang'
exclude group: "javax.servlet", module: "servlet-api"
exclude group: 'org.apache.commons', module: 'commons-math3' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl'
// a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-codec', module: 'commons-codec' // a dependency of org.apache.spark:spark-sql_2.11
exclude group: 'commons-lang', module: 'commons-lang' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-io', module: 'commons-io' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-logging', module: 'commons-logging' // a dependency of org.apache.hadoop:hadoop-auth
exclude group: 'log4j', module: 'log4j' // a dependency of org.apache.hadoop:hadoop-auth
exclude group: 'com.google.protobuf' // a dependency of org.apache.hadoop:hadoop-common
exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core'
// a dependency of org.apache.spark:spark-sql_2.11
exclude group: 'com.github.rwl', module: 'jtransforms' // a dependency of org.apache.spark:spark-mllib
exclude group: 'com.google.code.findbugs', module: 'jsr305' // a dependency of org.apache.hadoop:hadoop-common
exclude group: 'javax.xml.bind', module: 'jaxb-api' // a dependency of org.apache.hadoop:hadoop-yarn-common
exclude group: 'net.sourceforge.f2j', module: 'arpack_combined_all'
// a dependency of org.apache.spark:spark-graphx_2.11
}
}

jar {
enabled = false // we do not need to generate jar file in this case
archiveBaseName = "${jar.archiveBaseName.get()}_${scalaBaseVersion}"
}

shadowJar {
configurations = [project.configurations.shadowApi]
mergeServiceFiles()
Expand Down
7 changes: 6 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,12 @@ ext {
project(':sparkling-water-doc'),
project(':sparkling-water-booklet')
]
pythonProjects = [project(':sparkling-water-py'), project(':sparkling-water-py-scoring')]
pythonProjects = [
project(':sparkling-water-py'),
project(':sparkling-water-py-ext-jython-cfunc'),
project(':sparkling-water-py-ext-xgboost'),
project(':sparkling-water-py-scoring')
]
rProjects = [project(':sparkling-water-r'), project(':sparkling-water-r-cran')]
docProjects = [project(':sparkling-water-doc')]

Expand Down
34 changes: 34 additions & 0 deletions gradle/assembly.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
jar {
enabled = false // we do not need to generate jar file in this case
archiveBaseName = "${jar.archiveBaseName.get()}_${scalaBaseVersion}"
}

//
// Contains runtime configuration for Sparkling Water fat jar. This configuration transitively removes dependencies provided by
// Apache Spark or the Hadoop ecosystem.
//
configurations {
shadowApi {
extendsFrom api
exclude group: 'org.apache.spark'
exclude group: 'org.apache.hadoop'
exclude group: 'org.scala-lang'
exclude group: "javax.servlet", module: "servlet-api"
exclude group: 'org.apache.commons', module: 'commons-math3' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'org.codehaus.jackson', module: 'jackson-core-asl'
// a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-codec', module: 'commons-codec' // a dependency of org.apache.spark:spark-sql_2.11
exclude group: 'commons-lang', module: 'commons-lang' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-io', module: 'commons-io' // a dependency of org.apache.spark:spark-core_2.11
exclude group: 'commons-logging', module: 'commons-logging' // a dependency of org.apache.hadoop:hadoop-auth
exclude group: 'log4j', module: 'log4j' // a dependency of org.apache.hadoop:hadoop-auth
exclude group: 'com.google.protobuf' // a dependency of org.apache.hadoop:hadoop-common
exclude group: 'com.fasterxml.jackson.core', module: 'jackson-core'
// a dependency of org.apache.spark:spark-sql_2.11
exclude group: 'com.github.rwl', module: 'jtransforms' // a dependency of org.apache.spark:spark-mllib
exclude group: 'com.google.code.findbugs', module: 'jsr305' // a dependency of org.apache.hadoop:hadoop-common
exclude group: 'javax.xml.bind', module: 'jaxb-api' // a dependency of org.apache.hadoop:hadoop-yarn-common
exclude group: 'net.sourceforge.f2j', module: 'arpack_combined_all'
// a dependency of org.apache.spark:spark-graphx_2.11
}
}
21 changes: 21 additions & 0 deletions py-ext-jython-cfunc/README.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
PySparkling Jython-cfunc Extension
==================================

This package contains Jython library and other dependecies needed for execution of custom metrics.

|Join the chat at https://gitter.im/h2oai/sparkling-water| |License| |Powered by H2O.ai|

PySparkling Documentation is hosted at our documentation page:

- For Spark 3.1 - http://docs.h2o.ai/sparkling-water/3.1/latest-stable/doc/pysparkling.html
- For Spark 3.0 - http://docs.h2o.ai/sparkling-water/3.0/latest-stable/doc/pysparkling.html
- For Spark 2.4 - http://docs.h2o.ai/sparkling-water/2.4/latest-stable/doc/pysparkling.html
- For Spark 2.3 - http://docs.h2o.ai/sparkling-water/2.3/latest-stable/doc/pysparkling.html
- For Spark 2.2 - http://docs.h2o.ai/sparkling-water/2.2/latest-stable/doc/pysparkling.html

.. |Join the chat at https://gitter.im/h2oai/sparkling-water| image:: https://badges.gitter.im/Join%20Chat.svg
:target: Join the chat at https://gitter.im/h2oai/sparkling-water?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
.. |License| image:: https://img.shields.io/badge/License-Apache%202-blue.svg
:target: LICENSE
.. |Powered by H2O.ai| image:: https://img.shields.io/badge/powered%20by-h2oai-yellow.svg
:target: https://github.com/h2oai/
133 changes: 133 additions & 0 deletions py-ext-jython-cfunc/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
description = "PySparklingScoring - Sparkling Water Python Scoring Package"

apply from: "$rootDir/gradle/utils.gradle"
apply plugin: 'ru.vyarus.use-python'

import ru.vyarus.gradle.plugin.python.cmd.Python

def getPythonVersion() {
Python p = new Python(project, python.getPythonPath(), python.getPythonBinary())
return p.version
}

ext {
FS = File.separator
FPS = File.pathSeparator
pythonBinary = findProperty("pythonBinary") ?: "python"
pkgDir = file("$buildDir/pkg")
distDir = file("$buildDir/dist")
condaDir = file("$buildDir/conda/h2o_pysparkling_ext_jython_cfunc")
}

// Define the environment required to run tests
python {
if (project.hasProperty("pythonBinary")) {
pythonBinary project.findProperty("pythonBinary").toString()
}
if (project.hasProperty("pythonPath")) {
pythonPath project.findProperty("pythonPath").toString()
}

if (project.hasProperty("pythonEnvBasePath")) {
// for CI as we use pre-cached environment
envPath "${project.findProperty("pythonEnvBasePath")}/${getPythonVersion()}/${sparkVersion}"
} else {
envPath "${rootDir}/.gradle/python/${getPythonVersion()}/${sparkVersion}"
}
}

configurations {
sparklingWaterAssemblyJar
}

dependencies {
sparklingWaterAssemblyJar project(path: ':sparkling-water-assembly-ext-jython-cfunc', configuration: 'shadow')
}

task createVersionFile {
doLast {
def versionFileDir = new File(pkgDir, "sparkling_water_ext_jython_cfunc")
if (!versionFileDir.exists()) {
versionFileDir.mkdirs()
}
File version_file = new File(versionFileDir, "version.txt")

def version_txt = version
version_file.write(version_txt)
}
}

def copyPySetup() {
copy {
from("$projectDir/src") {
include 'setup.py'
}
filter {
it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_"))
.replaceAll("SUBST_PYTHON_VERSIONS", convertSupportedPythonVersionsToPyPiDefinitions(pythonEnvironments))
}
into pkgDir
}

copy {
from("$projectDir") {
include 'README.rst'
}
into pkgDir
}

copy {
from("$projectDir/src") {
include 'MANIFEST.in'
include 'setup.cfg'
exclude '**/*.pyc'
}
into pkgDir
}

copy {
from("$projectDir/conda/h2o_pysparkling_ext_jython_cfunc") {
include 'bld.bat'
include 'build.sh'
}
into condaDir
}

copy {
from("$projectDir/conda/h2o_pysparkling_ext_jython_cfunc") {
include 'meta.yaml'
}
filter {
it.replaceAll("SUBST_SW_VERSION", version.substring(0, version.lastIndexOf("-")).replace("-", "_"))
}
into condaDir
}
}

task createPkg(dependsOn: configurations.sparklingWaterAssemblyJar) {
copyPySetup()
// Copy sparkling water assembly jar
def fatJar = configurations.sparklingWaterAssemblyJar.singleFile
copy {
from fatJar
into file("${project.pkgDir}/sparkling_water_ext_jython_cfunc")
rename ".*", "sparkling_water_ext_jython_cfunc_assembly.jar"
}
// Save comment into module file
file("${project.pkgDir}/sparkling_water_ext_jython_cfunc/").mkdir()
file("${project.pkgDir}/sparkling_water_ext_jython_cfunc/__init__.py").write("# Sparkling-water JAR holder for pySparklingExtJythonCfunc module.")
}

//
// Cleanup
//
task cleanPython(type: Delete) {
delete getBuildDir()
}

//
// Setup execution graph
//
clean.dependsOn cleanPython
createPkg.dependsOn createVersionFile
build.dependsOn createPkg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
%PYTHON% setup.py install --single-version-externally-managed --record=record.txt
if errorlevel 1 exit 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#/bin/bash
$PYTHON setup.py install --single-version-externally-managed --record=record.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package:
name: h2o_pysparkling_ext_jython_cfunc
version: SUBST_SW_VERSION

source:
path: ../../pkg

requirements:
build:
- python
- pip >=9.0.1
- setuptools

run:
- python

about:
home: https://github.com/h2oai/sparkling-water.git
license: Apache License Version 2.0
license_family: Apache
Loading