Skip to content

Commit

Permalink
Add support for deploying airflow (#132)
Browse files Browse the repository at this point in the history
It's using our own fork of this idealista role available here:
https://github.com/ooni/airflow-role.

We should probably at some point upstream the changes.
  • Loading branch information
hellais authored Dec 20, 2024
1 parent cd8f0c6 commit f87f1ce
Show file tree
Hide file tree
Showing 14 changed files with 209 additions and 37 deletions.
9 changes: 9 additions & 0 deletions ansible/deploy-airflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
- name: Deploy airflow frontend host
hosts:
- data1.htz-fsn.prod.ooni.nu
become: true
roles:
- oonidata_airflow
vars:
airflow_public_fqdn: "airflow.prod.ooni.io"
14 changes: 2 additions & 12 deletions ansible/deploy-tier0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,5 @@
- name: Include clickhouse playbook
ansible.builtin.import_playbook: deploy-clickhouse.yml

- name: Deploy oonidata worker nodes
hosts:
- data1.htz-fsn.prod.ooni.nu
become: true
tags:
- oonidata_worker
roles:
- oonidata
vars:
enable_jupyterhub: false
enable_oonipipeline_worker: true
clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni"
- name: Include airflow playbook
ansible.builtin.import_playbook: deploy-airflow.yml
13 changes: 13 additions & 0 deletions ansible/group_vars/airflow/vars.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
airflow_admin_users:
- name: OONI Admin
username: admin
password: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_admin_password', profile='oonidevops_user_prod') }}"
role: Admin
firstname: Open
lastname: Observatory
email: [email protected]
airflow_fernet_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_fernet_key', profile='oonidevops_user_prod') }}"
airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}"
airflow_executor: "LocalExecutor"
airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}"
airflow_database_conn: "postgresql+psycopg2://airflow:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_postgresql_password', profile='oonidevops_user_prod') }}@ooni-tier0-postgres.c7mgscca82no.eu-central-1.rds.amazonaws.com/airflow"
3 changes: 3 additions & 0 deletions ansible/inventory
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ data1.htz-fsn.prod.ooni.nu
data2.htz-fsn.prod.ooni.nu
data3.htz-fsn.prod.ooni.nu

[airflow]
data1.htz-fsn.prod.ooni.nu

## Location tags

[htz_fsn]
Expand Down
7 changes: 7 additions & 0 deletions ansible/requirements.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
- src: willshersystems.sshd
version: v0.25.0
- src: nginxinc.nginx
version: 0.24.3
- src: geerlingguy.certbot
version: 5.2.0
- src: artis3n.tailscale
version: v4.5.0
- src: https://github.com/idealista/clickhouse_role
scm: git
version: 3.5.1
name: idealista.clickhouse_role
- src: https://github.com/ooni/airflow-role.git
scm: git
name: ooni.airflow_role
2 changes: 1 addition & 1 deletion ansible/roles/dehydrated/meta/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
dependencies:
- nginx-buster
- nginx
...

8 changes: 0 additions & 8 deletions ansible/roles/dehydrated/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,6 @@
tags: dehydrated
shell: systemctl reload nginx.service

- name: allow incoming TCP connections to Nginx on port 80
tags: dehydrated
blockinfile:
path: /etc/ooni/nftables/tcp/80.nft
create: yes
block: |
add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP"
- name: reload nftables service
tags: dehydrated
shell: systemctl reload nftables.service
Expand Down
10 changes: 0 additions & 10 deletions ansible/roles/nginx/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,3 @@
notify: reload nginx
tags:
- nginx

- name: create config dir
ansible.builtin.file:
path: /etc/ooni/nftables/tcp
state: directory
owner: root
group: root
mode: 0755
tags:
- nftables
25 changes: 25 additions & 0 deletions ansible/roles/oonidata_airflow/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## Airflow role deployment notes

There are a few pieces that are dependencies to this role running properly that
you will have to do manually:

* Setup the postgresql database and create the relevant DB and account.

Be sure to give correct permissions to the airflow user. Here is a relevant snippet:
```
CREATE DATABASE airflow
CREATE ROLE airflow WITH PASSWORD '' LOGIN;
GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow;
GRANT ALL ON SCHEMA public TO airflow;
```

* For some reason the admin account creation is failing. This is likely a bug
in the upstream role. During the last deploy this was addressed by logging
into the host and running the create task manually:
```
AIRFLOW_CONFIG=/etc/airflow/airflow.cfg AIRFLOW_HOME=/opt/airflow/ /opt/airflow/bin/airflow users create --username admin --password XXX --firstname Open --lastname Observatory --role Admin --email [email protected]
```

* Once the setup is complete, you will then have to login to the host using the
admin user and go into Admin->Configuration and add the `clickhouse_url`
variable
2 changes: 2 additions & 0 deletions ansible/roles/oonidata_airflow/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tls_cert_dir: /var/lib/dehydrated/certs
certbot_domains_extra: []
4 changes: 4 additions & 0 deletions ansible/roles/oonidata_airflow/handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- name: Reload nginx
ansible.builtin.systemd_service:
name: nginx
state: reloaded
89 changes: 89 additions & 0 deletions ansible/roles/oonidata_airflow/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
- name: Ensure Airflow group
group:
name: "airflow"
become: true

# TODO: uncomment this section if you want to redeploy it
# this was added after the user had already been created by the airflow_role
# and so it's failing because it's trying to modify the user.
#- name: Ensure Airflow user
# user:
# name: "airflow"
# group: "airflow"
# system: true
# shell: "/usr/sbin/nologin"
# createhome: "yes"
# home: "/opt/airflow"
# become: true

- name: Checkout oonidata repo
become_user: airflow
ansible.builtin.git:
repo: 'https://github.com/ooni/data.git'
dest: /opt/airflow/oonidata
version: airflow

- ansible.builtin.include_role:
name: ooni.airflow_role
tags:
- oonidata
- airflow
vars:
airflow_app_home: /opt/airflow
airflow_dags_folder: /opt/airflow/oonidata/dags/
airflow_webserver_host: "127.0.0.1"
airflow_webserver_port: 8080
airflow_webserver_base_url: "https://{{ airflow_public_fqdn }}"
airflow_environment_extra_vars:
- name: AIRFLOW_VAR_DATA_DIR
value: "{{ airflow_app_home }}/data_dir"
airflow_extra_packages:
- postgres
- virtualenv
airflow_services:
airflow_webserver:
service_name: airflow-webserver
enabled: true
running: true
state: started
path: airflow-webserver.service.j2
airflow_scheduler:
service_name: airflow-scheduler
enabled: true
running: true
state: started
path: airflow-scheduler.service.j2

- name: Set correct permissions on oonidata repo dir
ansible.builtin.file:
path: /opt/oonidata
state: directory
mode: '0755'
owner: airflow
recurse: yes

- ansible.builtin.include_role:
name: nginx
tags:
- oonidata
- nginx

- ansible.builtin.include_role:
name: dehydrated
tags:
- oonidata
- dehydrated
vars:
ssl_domains: "{{ [ inventory_hostname ] + [ airflow_public_fqdn ] }}"

- name: Setup airflow nginx config
ansible.builtin.template:
src: nginx-airflow.j2
dest: /etc/nginx/sites-enabled/02-airflow
owner: root
mode: "0655"
notify:
- Reload nginx
tags:
- oonidata
- config
40 changes: 40 additions & 0 deletions ansible/roles/oonidata_airflow/templates/nginx-airflow.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# ansible-managed in ooni/devops.git

map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}

server {
listen 443 ssl http2;

include /etc/nginx/ssl_intermediate.conf;

ssl_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/fullchain.pem;
ssl_certificate_key {{ tls_cert_dir }}/{{ inventory_hostname }}/privkey.pem;
ssl_trusted_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/chain.pem;

server_name {{ airflow_public_fqdn }};
access_log /var/log/nginx/{{ airflow_public_fqdn }}.access.log;
error_log /var/log/nginx/{{ airflow_public_fqdn }}.log warn;

add_header Access-Control-Allow-Origin *;

## Airflow reverse proxy
location / {
proxy_pass http://127.0.0.1:8080;

proxy_set_header X-Real-IP $remote_addr;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;

client_max_body_size 100M;

# WebSocket support
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
proxy_set_header X-Scheme $scheme;
proxy_buffering off;
}
}
20 changes: 14 additions & 6 deletions tf/environments/prod/dns_records.tf
Original file line number Diff line number Diff line change
Expand Up @@ -979,45 +979,53 @@ resource "aws_route53_record" "data1-htz-fsn-prod-ooni-nu-_a_" {
records = ["142.132.254.225"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_nu
zone_id = local.dns_zone_ooni_nu
}

resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" {
name = "data2.htz-fsn.prod.ooni.nu"
records = ["88.198.54.12"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_nu
zone_id = local.dns_zone_ooni_nu
}

resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" {
name = "data3.htz-fsn.prod.ooni.nu"
records = ["168.119.7.188"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_nu
zone_id = local.dns_zone_ooni_nu
}

resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" {
name = "clickhouse1.prod.ooni.io"
records = ["142.132.254.225"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_io
zone_id = local.dns_zone_ooni_io
}

resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" {
name = "clickhouse2.prod.ooni.io"
records = ["88.198.54.12"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_io
zone_id = local.dns_zone_ooni_io
}

resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" {
name = "clickhouse3.prod.ooni.io"
records = ["168.119.7.188"]
ttl = "60"
type = "A"
zone_id = local.dns_root_zone_ooni_io
zone_id = local.dns_zone_ooni_io
}

resource "aws_route53_record" "airflow-prod-ooni-io-_a_" {
name = "airflow.prod.ooni.io"
records = ["142.132.254.225"]
ttl = "60"
type = "A"
zone_id = local.dns_zone_ooni_io
}

0 comments on commit f87f1ce

Please sign in to comment.