Skip to content

Commit

Permalink
Merge branch 'master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
kanavnarula authored Oct 22, 2024
2 parents 332015f + cdb23ac commit 35847e7
Show file tree
Hide file tree
Showing 745 changed files with 30,693 additions and 18,105 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ jobs:
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.9.3 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.11.txt"
extra_pip_extras: plugin-v2
- python-version: "3.11"
extra_pip_requirements: "apache-airflow~=2.10.2 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.10.2/constraints-3.11.txt"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
- name: Set up JDK 17
Expand Down
9 changes: 4 additions & 5 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,16 @@ jobs:
timezoneLinux: ${{ matrix.timezone }}
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: pip
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v5
if: ${{ needs.setup.outputs.ingestion_change == 'true' }}
with:
python-version: "3.10"
cache: pip
- name: Gradle build (and test) for NOT metadata ingestion
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
run: |
Expand Down
42 changes: 42 additions & 0 deletions .github/workflows/contributor-open-pr-comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: PR Comment

on:
pull_request:
types: [opened]

permissions:
pull-requests: write

jobs:
post-pr-opened-comment:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Get and Format Username (PR only)
if: github.event_name == 'pull_request'
run: |
formatted_username=$(echo "${{ github.event.pull_request.user.login }}" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g')
echo "FORMATTED_USERNAME=$formatted_username" >> $GITHUB_ENV
- name: Create Comment (PR only)
if: github.event_name == 'pull_request'
uses: actions/github-script@v6
with:
script: |
if (context.payload.pull_request) {
const prUser = process.env.FORMATTED_USERNAME;
const url = `https://contributors.datahubproject.io/${prUser}`;
const body = `Hello @${prUser} :smile: \n\n Thank you so much for opening a pull request!\n\n![Image](https://contributors.datahubproject.io/api/og?userId=${{ github.event.pull_request.user.login }})\nYou can check out your contributor card and see all your past stats [here](${url})!`;
// Create a comment on the PR
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.payload.pull_request.number,
body: body
});
} else {
console.log('Not a pull request event.');
}
157 changes: 142 additions & 15 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,11 @@ jobs:
with:
image: ${{ env.DATAHUB_GMS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_GMS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
Expand Down Expand Up @@ -250,9 +252,11 @@ jobs:
with:
image: ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_MAE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
Expand Down Expand Up @@ -314,9 +318,11 @@ jobs:
with:
image: ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_MCE_CONSUMER_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
Expand Down Expand Up @@ -378,9 +384,11 @@ jobs:
with:
image: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_UPGRADE_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
Expand Down Expand Up @@ -444,9 +452,11 @@ jobs:
with:
image: ${{ env.DATAHUB_FRONTEND_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_FRONTEND_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
Expand Down Expand Up @@ -480,6 +490,41 @@ jobs:
context: .
file: ./docker/kafka-setup/Dockerfile
platforms: linux/amd64,linux/arm64/v8
kafka_setup_scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan Kafka Setup images for vulnerabilities"
runs-on: ubuntu-latest
needs: [ setup, kafka_setup_build ]
if: ${{ needs.setup.outputs.kafka_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }}
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: acryldata/sane-checkout-action@v3
- name: Download image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
with:
image: ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/[email protected]
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_KAFKA_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
template: "@/contrib/sarif.tpl"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"

mysql_setup_build:
name: Build and Push DataHub MySQL Setup Docker Image
Expand All @@ -501,6 +546,41 @@ jobs:
context: .
file: ./docker/mysql-setup/Dockerfile
platforms: linux/amd64,linux/arm64/v8
mysql_setup_scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan MySQL Setup images for vulnerabilities"
runs-on: ubuntu-latest
needs: [ setup, mysql_setup_build ]
if: ${{ needs.setup.outputs.mysql_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true') }}
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: acryldata/sane-checkout-action@v3
- name: Download image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
with:
image: ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/[email protected]
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_MYSQL_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
template: "@/contrib/sarif.tpl"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"

elasticsearch_setup_build:
name: Build and Push DataHub Elasticsearch Setup Docker Image
Expand All @@ -522,6 +602,41 @@ jobs:
context: .
file: ./docker/elasticsearch-setup/Dockerfile
platforms: linux/amd64,linux/arm64/v8
elasticsearch_setup_scan:
permissions:
contents: read # for actions/checkout to fetch code
security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
name: "[Monitoring] Scan ElasticSearch setup images for vulnerabilities"
runs-on: ubuntu-latest
needs: [ setup, elasticsearch_setup_build ]
if: ${{ needs.setup.outputs.elasticsearch_setup_change == 'true' || (needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' ) }}
steps:
- name: Checkout # adding checkout step just to make trivy upload happy
uses: acryldata/sane-checkout-action@v3
- name: Download image
uses: ishworkh/docker-image-artifact-download@v1
if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
with:
image: ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/[email protected]
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}
format: "template"
template: "@/contrib/sarif.tpl"
output: "trivy-results.sarif"
severity: "CRITICAL,HIGH"
ignore-unfixed: true
vuln-type: "os,library"
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: "trivy-results.sarif"

datahub_ingestion_base_build:
name: Build and Push DataHub Ingestion (Base) Docker Image
Expand Down Expand Up @@ -645,14 +760,18 @@ jobs:
needs: [setup, datahub_ingestion_base_slim_build]
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- name: Build codegen
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish =='true' }}
run: ./gradlew :metadata-ingestion:codegen
Expand Down Expand Up @@ -709,9 +828,11 @@ jobs:
with:
image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }}
- name: Run Trivy vulnerability scanner Slim Image
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_slim_build.outputs.tag }}
format: "template"
Expand All @@ -735,14 +856,18 @@ jobs:
needs: [setup, datahub_ingestion_base_full_build]
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- name: Build codegen
if: ${{ needs.setup.outputs.ingestion_change == 'true' || needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }}
run: ./gradlew :metadata-ingestion:codegen
Expand Down Expand Up @@ -797,9 +922,11 @@ jobs:
with:
image: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.tag }}
- name: Run Trivy vulnerability scanner Full Image
uses: aquasecurity/trivy-action@0.8.0
uses: aquasecurity/trivy-action@0.26.0
env:
TRIVY_OFFLINE_SCAN: true
TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
with:
image-ref: ${{ env.DATAHUB_INGESTION_IMAGE }}:${{ needs.datahub_ingestion_full_build.outputs.tag }}
format: "template"
Expand Down Expand Up @@ -864,16 +991,16 @@ jobs:
run: df -h . && docker images
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: 17
- uses: gradle/actions/setup-gradle@v3
- uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"
- name: Login to DockerHub
uses: docker/login-action@v3
if: ${{ needs.setup.outputs.docker-login == 'true' }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ concurrency:
jobs:
metadata-ingestion:
runs-on: ubuntu-latest
timeout-minutes: 40
env:
SPARK_VERSION: 3.3.2
DATAHUB_TELEMETRY_ENABLED: false
Expand Down
Loading

0 comments on commit 35847e7

Please sign in to comment.