From 03c3b602d50ee9547f6bafba0fc7a501e2bd11af Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Wed, 18 Jan 2023 16:16:32 +0530 Subject: [PATCH 1/6] Issue #000 feat: S3 presto dependency added --- jobs-distribution/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/jobs-distribution/Dockerfile b/jobs-distribution/Dockerfile index 4fbb6b576..d1af55442 100644 --- a/jobs-distribution/Dockerfile +++ b/jobs-distribution/Dockerfile @@ -7,6 +7,8 @@ RUN apt-get install -y imagemagick COPY target/jobs-distribution-1.0.tar.gz /tmp USER flink RUN tar -xvf /tmp/jobs-distribution-1.0.tar.gz -C $FLINK_HOME/lib/ +RUN mkdir $FLINK_HOME/plugins/s3-fs-presto +RUN cp $FLINK_HOME/opt/flink-s3-fs-presto-1.13.5.jar $FLINK_HOME/plugins/s3-fs-presto/ USER root RUN rm -f /tmp/jobs-distribution-1.0.tar.gz USER flink From e154fa8d8bee3473927abc252ff528a5d5654ace Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Fri, 4 Aug 2023 16:19:11 +0530 Subject: [PATCH 2/6] Issue #ED-2072 feat: changes done for oci --- jobs-core/pom.xml | 2 +- jobs-distribution/Dockerfile | 3 +-- .../templates/flink_job_deployment.yaml | 18 ++++++++++++++- .../helm_charts/datapipeline_jobs/values.j2 | 23 +++++++++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) diff --git a/jobs-core/pom.xml b/jobs-core/pom.xml index 80318a60f..eae620fbb 100644 --- a/jobs-core/pom.xml +++ b/jobs-core/pom.xml @@ -81,7 +81,7 @@ org.sunbird cloud-store-sdk_2.12 - 1.4.3 + 1.4.6 log4j diff --git a/jobs-distribution/Dockerfile b/jobs-distribution/Dockerfile index d1af55442..cc5b88af4 100644 --- a/jobs-distribution/Dockerfile +++ b/jobs-distribution/Dockerfile @@ -7,8 +7,7 @@ RUN apt-get install -y imagemagick COPY target/jobs-distribution-1.0.tar.gz /tmp USER flink RUN tar -xvf /tmp/jobs-distribution-1.0.tar.gz -C $FLINK_HOME/lib/ -RUN mkdir $FLINK_HOME/plugins/s3-fs-presto -RUN cp $FLINK_HOME/opt/flink-s3-fs-presto-1.13.5.jar $FLINK_HOME/plugins/s3-fs-presto/ +RUN cp $FLINK_HOME/opt/flink-s3-fs-presto-1.13.5.jar $FLINK_HOME/lib/flink-aaa-s3-fs-presto-1.13.5.jar USER root RUN rm -f /tmp/jobs-distribution-1.0.tar.gz USER flink diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index ebd851a45..15a12e1c9 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -109,8 +109,16 @@ spec: workingDir: /opt/flink command: ["/opt/flink/bin/standalone-job.sh"] args: ["start-foreground", - "--job-classname={{ .Values.job_classname }}", + "--job-classname={{ .Values.job_classname }}", + {{- if eq .Values.csp "oci" }} + "-Dpresto.s3.access-key={{ .Values.s3_access_key}}", + "-Dpresto.s3.secret-key={{ .Values.s3_secret_key }}", + "-Dpresto.s3.endpoint={{ .Values.s3_endpoint }}", + "-Dpresto.s3.region={{ .Values.s3_region }}", + "-Dpresto.s3.path-style-access={{ .Values.s3_path_style_access }}", + {{- else }} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + {{- end}} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", @@ -183,7 +191,15 @@ spec: workingDir: {{ .Values.taskmanager.flink_work_dir }} command: ["/opt/flink/bin/taskmanager.sh"] args: ["start-foreground", + {{- if eq .Values.csp "oci"}} + "-Dpresto.s3.access.key={{ .Values.s3_access_key}}", + "-Dpresto.s3.secret.key={{ .Values.s3_secret_key }}", + "-Dpresto.s3.endpoint={{ .Values.s3_endpoint }}", + "-Dpresto.s3.endpoint={{ .Values.s3_region }}", + "-Dpresto.s3.path.style.access={{ .Values.s3_path_style_access }}", + {{- else}} "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + {{- end}} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", "-Dmetrics.reporter.prom.host={{ .Release.Name }}-taskmanager", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index ced29ea08..ee108464a 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -5,6 +5,16 @@ repository: {{flink_repository|default('data-pipeline')}} image_tag: {{ image_tag }} azure_account: {{ azure_account }} azure_secret: {{ azure_secret }} +csp: {{cloud_service_provider}} +s3_access_key: {{ cloud_public_storage_accountname }} +s3_secret_key: {{cloud_public_storage_secret}} +{% if cloud_service_provider == "oci" %} +s3_endpoint: {{oci_flink_s3_storage_endpoint}} +s3_region: {{s3_region}} +s3_path_style_access: true +{% else %} +s3_endpoint: {{cloud_public_storage_endpoint}} +{% endif %} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} @@ -74,6 +84,18 @@ base_config: | job { env = "{{ env_name }}" enable.distributed.checkpointing = true + {% if cloud_service_provider == "oci" %} + statebackend { + s3 { + storage { + endpoint = "{{ oci_flink_s3_storage_endpoint }}" + container = "{{ flink_container_name }}" + checkpointing.dir = "checkpoint" + } + } + base.url = "s3://"${job.statebackend.s3.storage.container}"/"${job.statebackend.s3.storage.checkpointing.dir} + } + {% else %} statebackend { blob { storage { @@ -84,6 +106,7 @@ base_config: | } base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} } + {% endif %} } task { parallelism = 1 From e4352ae524cc7c88e34f569a9d588f6e5b0f413b Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Tue, 8 Aug 2023 18:07:25 +0530 Subject: [PATCH 3/6] Issue #IQ-524 feat: changes for cloud storage --- .circleci/config.yml | 2 +- jobs-core/pom.xml | 6 +++--- kubernetes/pipelines/build/Jenkinsfile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 21da9deeb..88183a097 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,7 +19,7 @@ jobs: - run: name: Execute coverage report command: | - mvn clean scoverage:report + mvn clean scoverage:report -DCLOUD_STORE_GROUP_ID=$CLOUD_STORE_GROUP_ID -DCLOUD_STORE_ARTIFACT_ID=$CLOUD_STORE_ARTIFACT_ID -DCLOUD_STORE_VERSION=$CLOUD_STORE_VERSION - run: name: Save test results command: | diff --git a/jobs-core/pom.xml b/jobs-core/pom.xml index eae620fbb..300bfd582 100644 --- a/jobs-core/pom.xml +++ b/jobs-core/pom.xml @@ -79,9 +79,9 @@ 0.1.1 - org.sunbird - cloud-store-sdk_2.12 - 1.4.6 + ${CLOUD_STORE_GROUP_ID} + ${CLOUD_STORE_ARTIFACT_ID} + ${CLOUD_STORE_VERSION} log4j diff --git a/kubernetes/pipelines/build/Jenkinsfile b/kubernetes/pipelines/build/Jenkinsfile index 33f4b67dc..dc932cab0 100644 --- a/kubernetes/pipelines/build/Jenkinsfile +++ b/kubernetes/pipelines/build/Jenkinsfile @@ -26,7 +26,7 @@ node('build-slave') { env.NODE_ENV = "build" print "Environment will be : ${env.NODE_ENV}" sh '/opt/apache-maven-3.6.3/bin/mvn3.6 -v' - sh '/opt/apache-maven-3.6.3/bin/mvn3.6 clean install -DskipTests' + sh '/opt/apache-maven-3.6.3/bin/mvn3.6 clean install -DskipTests -DCLOUD_STORE_GROUP_ID=${params.CLOUD_STORE_GROUP_ID} -DCLOUD_STORE_ARTIFACT_ID=${params.CLOUD_STORE_ARTIFACT_ID} -DCLOUD_STORE_VERSION=${params.CLOUD_STORE_VERSION}' } From 4ba4c62e81a4e7a2b2e13d17b20bfdc52c1edbb0 Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Tue, 8 Aug 2023 19:08:29 +0530 Subject: [PATCH 4/6] Issue #IQ-524 fix: fix for imagemagic --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 88183a097..908ed3c45 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,6 +14,8 @@ jobs: - run: name: Installation of imagemagick command: | + sudo sed -i -e 's/deb.debian.org/archive.debian.org/g' -e 's|security.debian.org|archive.debian.org/|g' -e '/stretch-updates/d' /etc/apt/sources.list + sudo rm /etc/apt/sources.list.d/stretch-backports.list sudo apt-get update || sudo apt-get update sudo apt-get install -y imagemagick - run: From 73499395f641ad551170e4fc585301cc4b522121 Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Wed, 9 Aug 2023 16:24:07 +0530 Subject: [PATCH 5/6] Issue #524 fix: updated test cases --- jobs-core/src/test/scala/org/sunbird/spec/FileUtilsSpec.scala | 2 +- jobs-core/src/test/scala/org/sunbird/spec/HTTPUtilSpec.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/jobs-core/src/test/scala/org/sunbird/spec/FileUtilsSpec.scala b/jobs-core/src/test/scala/org/sunbird/spec/FileUtilsSpec.scala index de4a7682d..35dba027d 100644 --- a/jobs-core/src/test/scala/org/sunbird/spec/FileUtilsSpec.scala +++ b/jobs-core/src/test/scala/org/sunbird/spec/FileUtilsSpec.scala @@ -13,7 +13,7 @@ class FileUtilsSpec extends FlatSpec with Matchers { } "downloadFile " should " download the media source file starting with http or https " in { - val fileUrl: String = "https://preprodall.blob.core.windows.net/ntp-content-preprod/content/do_21273718766395392014320/artifact/book-image_1554832478631.jpg" + val fileUrl: String = "https://sunbirddevbbpublic.blob.core.windows.net/sunbird-content-staging/content/assets/do_2137327580080128001217/gateway-of-india.jpg" val downloadedFile: File = FileUtils.downloadFile(fileUrl, "/tmp/contentBundle") assert(downloadedFile.exists()) } diff --git a/jobs-core/src/test/scala/org/sunbird/spec/HTTPUtilSpec.scala b/jobs-core/src/test/scala/org/sunbird/spec/HTTPUtilSpec.scala index 861f7d663..60c83fb61 100644 --- a/jobs-core/src/test/scala/org/sunbird/spec/HTTPUtilSpec.scala +++ b/jobs-core/src/test/scala/org/sunbird/spec/HTTPUtilSpec.scala @@ -56,7 +56,7 @@ class HTTPUtilSpec extends FlatSpec with Matchers { } "downloadFile" should "download file with lower case name" in { - val fileUrl = "https://file-examples.com/wp-content/uploads/2017/04/file_example_MP4_480_1_5MG.mp4" + val fileUrl = "https://sunbirddevbbpublic.blob.core.windows.net/sunbird-content-staging/content/assets/do_2137327580080128001217/gateway-of-india.jpg" val httpUtil = new HttpUtil val downloadPath = "/tmp/content" + File.separator + "_temp_" + System.currentTimeMillis val downloadedFile = httpUtil.downloadFile(fileUrl, downloadPath) From 495bc83e29aefe175c581535f2393e7a6b1d187c Mon Sep 17 00:00:00 2001 From: Kumar Gauraw Date: Wed, 9 Aug 2023 17:26:05 +0530 Subject: [PATCH 6/6] Issue #IQ-524 fix: updated build script --- kubernetes/pipelines/build/Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kubernetes/pipelines/build/Jenkinsfile b/kubernetes/pipelines/build/Jenkinsfile index dc932cab0..81cd18386 100644 --- a/kubernetes/pipelines/build/Jenkinsfile +++ b/kubernetes/pipelines/build/Jenkinsfile @@ -21,13 +21,15 @@ node('build-slave') { commit_hash = sh(script: 'git rev-parse --short HEAD', returnStdout: true).trim() build_tag = sh(script: "echo " + params.inquiry_release_tag.split('/')[-1] + "_" + commit_hash + "_" + env.BUILD_NUMBER, returnStdout: true).trim() echo "build_tag: " + build_tag + cloud_store_group_id = params.CLOUD_STORE_GROUP_ID + cloud_store_artifact_id = params.CLOUD_STORE_ARTIFACT_ID + cloud_store_version = params.CLOUD_STORE_VERSION stage('Build') { env.NODE_ENV = "build" print "Environment will be : ${env.NODE_ENV}" sh '/opt/apache-maven-3.6.3/bin/mvn3.6 -v' - sh '/opt/apache-maven-3.6.3/bin/mvn3.6 clean install -DskipTests -DCLOUD_STORE_GROUP_ID=${params.CLOUD_STORE_GROUP_ID} -DCLOUD_STORE_ARTIFACT_ID=${params.CLOUD_STORE_ARTIFACT_ID} -DCLOUD_STORE_VERSION=${params.CLOUD_STORE_VERSION}' - + sh '/opt/apache-maven-3.6.3/bin/mvn3.6 clean install -DskipTests -DCLOUD_STORE_GROUP_ID=' + cloud_store_group_id + ' -DCLOUD_STORE_ARTIFACT_ID=' + cloud_store_artifact_id + ' -DCLOUD_STORE_VERSION=' + cloud_store_version } stage('Package') {