Skip to content

Commit

Permalink
working example
Browse files Browse the repository at this point in the history
  • Loading branch information
LiorF-BDBQ committed Mar 14, 2023
0 parents commit 0e0a4ba
Show file tree
Hide file tree
Showing 13 changed files with 685 additions and 0 deletions.
24 changes: 24 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
.gradle
**/build/
!src/**/build/

# Ignore Gradle GUI config
gradle-app.setting

# Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
!gradle-wrapper.jar

# Avoid ignore Gradle wrappper properties
!gradle-wrapper.properties

# Cache of project
.gradletasknamecache

# Eclipse Gradle plugin generated files
# Eclipse Core
.project
# JDT-specific (Eclipse Java Development Tools)
.classpath

.idea/
.env
6 changes: 6 additions & 0 deletions README
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A Flink application project using Java and Gradle.

To package your job for submission to Flink, use: 'gradle shadowJar'. Afterwards, you'll find the
jar to use in the 'build/libs' folder.

To run and test your application with an embedded instance of Flink use: 'gradle run'
149 changes: 149 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
buildscript {
ext.kotlin_version = '1.6.10'
dependencies {
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
classpath "org.jetbrains.kotlin:kotlin-serialization:$kotlin_version"
}
}

plugins {
id 'java'
id 'application'
// shadow plugin to produce fat JARs
id "com.github.johnrengelman.shadow" version '7.1.2'
}

ext {
javaVersion = '11'
flinkVersion = '1.16.0'
flinkShortVersion = '1.16'
scalaBinaryVersion = '2.12'
slf4jVersion = '1.7.36'
log4jVersion = '2.17.1'
hadoopVersion = "3.3.2"
icebergVersion = '1.1.0'
jacksonVersion = '2.14.2'
awsSdkVersion = '2.19.29'
}

apply plugin: "kotlin"
apply plugin: 'kotlinx-serialization'

apply plugin: 'com.github.johnrengelman.shadow'


// artifact properties
group = 'com.bigdataboutique'
version = '0.1-SNAPSHOT'
mainClassName = 'com.bigdataboutique.Main'
description = """Flink Iceberg Append Job Blueprint"""

sourceCompatibility = javaVersion
targetCompatibility = javaVersion
tasks.withType(JavaCompile) {
options.encoding = 'UTF-8'
}

tasks.test {
useJUnitPlatform()
testLogging {
events = ["passed", "skipped", "failed"]
}
}

applicationDefaultJvmArgs = ["-Dlog4j.configurationFile=log4j2.properties"]

// declare where to find the dependencies of your project
repositories {
mavenCentral()
// maven {
// url "https://repository.apache.org/content/repositories/snapshots"
// mavenContent {
// snapshotsOnly()
// }
// }
}

// NOTE: We cannot use "compileOnly" or "shadow" configurations since then we could not run code
// in the IDE or with "gradle run". We also cannot exclude transitive dependencies from the
// shadowJar yet (see https://github.com/johnrengelman/shadow/issues/159).
// -> Explicitly define the // libraries we want to be included in the "flinkShadowJar" configuration!
configurations {
flinkShadowJar // dependencies which go into the shadowJar
// always exclude these (also from transitive dependencies) since they are provided by Flink
flinkShadowJar.exclude group: 'org.apache.flink', module: 'force-shading'
flinkShadowJar.exclude group: 'com.google.code.findbugs', module: 'jsr305'
flinkShadowJar.exclude group: 'org.slf4j'
flinkShadowJar.exclude group: 'org.apache.logging.log4j'
}

// declare the dependencies for your production and test code
dependencies {
// Since we use kotlin
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
flinkShadowJar "org.jetbrains.kotlin:kotlin-stdlib-jdk8:$kotlin_version"
implementation "org.jetbrains.kotlinx:kotlinx-serialization-runtime:1.0-M1-1.4.0-rc"
flinkShadowJar "org.jetbrains.kotlinx:kotlinx-serialization-runtime:1.0-M1-1.4.0-rc"
// --------------------------------------------------------------
// Compile-time dependencies that should NOT be part of the
// shadow jar and are provided in the lib folder of Flink
// --------------------------------------------------------------
implementation "org.apache.flink:flink-streaming-java:${flinkVersion}"
implementation "org.apache.flink:flink-clients:${flinkVersion}"
implementation "org.apache.flink:flink-table:${flinkVersion}"
implementation "org.apache.flink:flink-connector-kafka:${flinkVersion}"
implementation "org.apache.flink:flink-runtime-web:${flinkVersion}"

// For running in debug
implementation "org.apache.flink:flink-table-runtime:${flinkVersion}"
implementation "org.apache.flink:flink-table-planner-loader:${flinkVersion}"

// Hadoop and iceberg dependencies
implementation "org.apache.hadoop:hadoop-common:${hadoopVersion}"
flinkShadowJar "org.apache.hadoop:hadoop-common:${hadoopVersion}"
implementation "org.apache.iceberg:iceberg-flink:${icebergVersion}"
flinkShadowJar "org.apache.iceberg:iceberg-flink:${icebergVersion}"
implementation "org.apache.iceberg:iceberg-flink-runtime-${flinkShortVersion}:${icebergVersion}"
flinkShadowJar "org.apache.iceberg:iceberg-flink-runtime-${flinkShortVersion}:${icebergVersion}"

// AWS dependencies.. jar is huge if we just use "bundle"..
implementation "software.amazon.awssdk:glue:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:glue:${awsSdkVersion}"
implementation "software.amazon.awssdk:s3:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:s3:${awsSdkVersion}"
implementation "software.amazon.awssdk:sts:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:sts:${awsSdkVersion}"
implementation "software.amazon.awssdk:dynamodb:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:dynamodb:${awsSdkVersion}"
implementation "software.amazon.awssdk:kms:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:kms:${awsSdkVersion}"
implementation "software.amazon.awssdk:url-connection-client:${awsSdkVersion}"
flinkShadowJar "software.amazon.awssdk:url-connection-client:${awsSdkVersion}"

runtimeOnly "org.apache.logging.log4j:log4j-slf4j-impl:${log4jVersion}"
runtimeOnly "org.apache.logging.log4j:log4j-api:${log4jVersion}"
runtimeOnly "org.apache.logging.log4j:log4j-core:${log4jVersion}"

// Add test dependencies here.
testImplementation(platform("org.junit:junit-bom:5.9.2"))
testImplementation 'org.junit.jupiter:junit-jupiter:5.9.2'
}

// make compileOnly dependencies available for tests:
sourceSets {
main.java.srcDirs += 'src/main/kotlin/'
test.java.srcDirs += 'src/test/kotlin/'
}

run.classpath = sourceSets.main.runtimeClasspath

jar {
manifest {
attributes 'Built-By': System.getProperty('user.name'),
'Build-Jdk': System.getProperty('java.version')
}
}

shadowJar {
configurations = [project.configurations.flinkShadowJar]
}
26 changes: 26 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: "3.5"
services:
jobmanager:
image: flink:1.16.0-scala_2.12
ports:
- "8081:8081"
command: jobmanager
env_file:
- .env
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: jobmanager
taskmanager:
image: flink:1.16.0-scala_2.12
depends_on:
- jobmanager
command: taskmanager
env_file:
- .env
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: jobmanager
taskmanager.numberOfTaskSlots: 2
1 change: 1 addition & 0 deletions gradle.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
org.gradle.jvmargs=-Xmx4096M
Binary file added gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
6 changes: 6 additions & 0 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
org.gradle.jvmargs=-Xmx4096M
Loading

0 comments on commit 0e0a4ba

Please sign in to comment.