Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
alexarchambault committed Jul 26, 2022
0 parents commit 3145f76
Show file tree
Hide file tree
Showing 8 changed files with 473 additions and 0 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: CI
on:
push:
branches:
- main
tags:
- "v*"
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: coursier/[email protected]
- uses: VirtusLab/[email protected]
- name: Test
run: scala-cli test src --cross

# publish:
# needs: test
# if: github.event_name == 'push'
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# with:
# fetch-depth: 0
# - uses: coursier/[email protected]
# - uses: VirtusLab/[email protected]
# - name: Publish
# run: scala-cli publish src --cross
# env:
# PUBLISH_USER: ${{ secrets.PUBLISH_USER }}
# PUBLISH_PASSWORD: ${{ secrets.PUBLISH_PASSWORD }}
# PUBLISH_SECRET_KEY: ${{ secrets.PUBLISH_SECRET_KEY }}
# PUBLISH_SECRET_KEY_PASSWORD: ${{ secrets.PUBLISH_SECRET_KEY_PASSWORD }}

package:
needs: test
if: github.event_name == 'push'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: coursier/[email protected]
- uses: VirtusLab/[email protected]
- name: Create and upload archives
run: scala-cli run Upload.scala
env:
UPLOAD_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.bsp/
.scala-build/
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# lightweight-spark-distrib

*lightweight-spark-distrib* is a small application allowing to make Spark distributions more
lightweight. From an existing Spark distribution, *lightweight-spark-distrib* looks for the
JARs it contains and tries to find those on Maven Central. It then all files but the JARs
it found on Maven Central to a new directory, and writes alongside them a script that
relies on [coursier](https://github.com/coursier/coursier) to fetch the missing JARs.

The resulting Spark distributions are much more lightweight (~25 MB uncompressed / ~16 MB compressed)
than their original counterpart (which usually weight more than 200 MB). As a consequence, the former
are easier to distribute, and more easily benefit from mechanisms such as CI caches.


## Generate a lightweight archive

```text
$ scala-cli run \
--workspace . \
src \
-- \
--dest spark-3.0.3-bin-hadoop2.7-lightweight.tgz \
https://archive.apache.org/dist/spark/spark-3.0.3/spark-3.0.3-bin-hadoop2.7.tgz \
--spark 3.0.3 \
--scala 2.12.10 \
--archive
```
41 changes: 41 additions & 0 deletions Upload.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//> using scala "2.13"
//> using lib "io.github.alexarchambault.mill::mill-native-image-upload:0.1.21"
//> using lib "com.lihaoyi::os-lib:0.8.1"

object Upload {
private def create(scalaVersion: String, sparkVersion: String, sourceUrl: String, dest: os.Path): Unit =
os.proc("scala-cli", "run", "src", "--", "--force", "--dest", dest, "--archive", "--scala", scalaVersion, "--spark", sparkVersion, sourceUrl)
.call(stdin = os.Inherit, stdout = os.Inherit)
private def versions = Seq(
"2.12.15" -> "3.0.3",
"2.12.15" -> "2.4.2"
)
def main(args: Array[String]): Unit = {
val tag = os.proc("git", "tag", "--points-at", "HEAD").call().out.trim()
val dummy = tag.isEmpty
if (dummy)
System.err.println("Not on a git tag, running in dummy mode")
val token = Option(System.getenv("UPLOAD_GH_TOKEN")).getOrElse {
if (dummy) ""
else sys.error("UPLOAD_GH_TOKEN not set")
}
val files = versions.map {
case (scalaVer, sparkVer) =>
val url = s"https://archive.apache.org/dist/spark/spark-$sparkVer/spark-$sparkVer-bin-hadoop2.7.tgz"
val sbv = scalaVer.split('.').take(2).mkString(".")
val name = s"spark-$sparkVer-bin-hadoop2.7-scala$sbv"
val dest = os.temp(prefix = name, suffix = ".tgz")
create(scalaVer, sparkVer, url, dest)
dest -> s"$name.tgz"
}
if (!dummy)
io.github.alexarchambault.millnativeimage.upload.Upload.upload(
ghOrg = "scala-cli",
ghProj = "lightweight-spark-distrib",
ghToken = token,
tag = tag,
dryRun = false,
overwrite = true
)(files: _*)
}
}
Loading

0 comments on commit 3145f76

Please sign in to comment.