diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7e99e36 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc \ No newline at end of file diff --git a/airflow-envvars-configmap.yaml b/airflow-envvars-configmap.yaml new file mode 100644 index 0000000..88b26dd --- /dev/null +++ b/airflow-envvars-configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: airflow-envvars-configmap +data: + EXECUTOR: Kubernetes + POSTGRES_HOST: postgres + POSTGRES_USER: airflow + POSTGRES_PASSWORD: airflow + POSTGRES_DB: airflow + POSTGRES_PORT: "5432" + LOAD_EX: "y" + # The conf below is necessary because of a typo in the config on docker-airflow image: + # https://github.com/puckel/docker-airflow/blob/bed777970caa3e555ef618d84be07404438c27e3/config/airflow.cfg#L934 + AIRFLOW__KUBERNETES__KUBE_CLIENT_REQUEST_ARGS: '{"_request_timeout": [60,60]}' + AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: puckel/docker-airflow + AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: "1.10.9" + AIRFLOW__KUBERNETES__DAGS_VOLUME_HOST: /mnt/airflow/dags + AIRFLOW__KUBERNETES__LOGS_VOLUME_CLAIM: airflow-logs-pvc + AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF: airflow-envvars-configmap diff --git a/airflow-rbac.yaml b/airflow-rbac.yaml new file mode 100644 index 0000000..7c9fcca --- /dev/null +++ b/airflow-rbac.yaml @@ -0,0 +1,23 @@ +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pods-permissions +rules: + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch", "create", "delete"] + +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pods-permissions +subjects: + - kind: ServiceAccount + name: default + namespace: default +roleRef: + kind: ClusterRole + name: pods-permissions + apiGroup: rbac.authorization.k8s.io diff --git a/airflow-scheduler-deployment.yaml b/airflow-scheduler-deployment.yaml new file mode 100644 index 0000000..70ac706 --- /dev/null +++ b/airflow-scheduler-deployment.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: airflow-scheduler + labels: + app: airflow-k8s + +spec: + selector: + matchLabels: + app: airflow-scheduler + + replicas: 1 + + template: + metadata: + labels: + app: airflow-scheduler + + spec: + containers: + - name: airflow-scheduler + image: puckel/docker-airflow:1.10.9 + args: ["scheduler"] + envFrom: + - configMapRef: + name: airflow-envvars-configmap + resources: + limits: + memory: "512Mi" + # cpu: "100" + volumeMounts: + - name: requirements-configmap + subPath: "requirements.txt" + mountPath: "/requirements.txt" + - name: dags-host-volume + mountPath: /usr/local/airflow/dags + - name: logs-persistent-storage + mountPath: /usr/local/airflow/logs + volumes: + - name: requirements-configmap + configMap: + name: requirements-configmap + - name: dags-host-volume + hostPath: + path: /mnt/airflow/dags + type: Directory + - name: logs-persistent-storage + persistentVolumeClaim: + claimName: airflow-logs-pvc diff --git a/airflow-webserver-deployment.yaml b/airflow-webserver-deployment.yaml new file mode 100644 index 0000000..247b380 --- /dev/null +++ b/airflow-webserver-deployment.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: airflow-webserver + labels: + app: airflow-k8s + +spec: + selector: + matchLabels: + app: airflow-webserver + + replicas: 1 + + template: + metadata: + labels: + app: airflow-webserver + + spec: + containers: + - name: airflow-webserver + image: puckel/docker-airflow:1.10.9 + envFrom: + - configMapRef: + name: airflow-envvars-configmap + resources: + limits: + memory: "2Gi" + # cpu: "100" + ports: + - containerPort: 8080 + volumeMounts: + - name: requirements-configmap + subPath: "requirements.txt" + mountPath: "/requirements.txt" + - name: dags-host-volume + mountPath: /usr/local/airflow/dags + - name: logs-persistent-storage + mountPath: /usr/local/airflow/logs + volumes: + - name: requirements-configmap + configMap: + name: requirements-configmap + - name: dags-host-volume + hostPath: + path: /mnt/airflow/dags + type: Directory + - name: logs-persistent-storage + persistentVolumeClaim: + claimName: airflow-logs-pvc diff --git a/airflow-webserver-service.yaml b/airflow-webserver-service.yaml new file mode 100644 index 0000000..0eae25b --- /dev/null +++ b/airflow-webserver-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: airflow-webserver + labels: + app: airflow-k8s + +spec: + type: NodePort + + selector: + app: airflow-webserver + + ports: + - port: 8080 diff --git a/dags/tuto.py b/dags/tuto.py new file mode 100644 index 0000000..cead2b6 --- /dev/null +++ b/dags/tuto.py @@ -0,0 +1,48 @@ +""" +Code that goes along with the Airflow located at: +http://airflow.readthedocs.org/en/latest/tutorial.html +""" +from airflow import DAG +from airflow.operators.bash_operator import BashOperator +from datetime import datetime, timedelta + + +default_args = { + "owner": "airflow", + "depends_on_past": False, + "start_date": datetime(2015, 6, 1), + "email": ["airflow@airflow.com"], + "email_on_failure": False, + "email_on_retry": False, + "retries": 1, + "retry_delay": timedelta(minutes=5), + # 'queue': 'bash_queue', + # 'pool': 'backfill', + # 'priority_weight': 10, + # 'end_date': datetime(2016, 1, 1), +} + +dag = DAG("tutorial", default_args=default_args, schedule_interval=timedelta(1)) + +# t1, t2 and t3 are examples of tasks created by instantiating operators +t1 = BashOperator(task_id="print_date", bash_command="date", dag=dag) + +t2 = BashOperator(task_id="sleep", bash_command="sleep 5", retries=3, dag=dag) + +templated_command = """ + {% for i in range(5) %} + echo "{{ ds }}" + echo "{{ macros.ds_add(ds, 7)}}" + echo "{{ params.my_param }}" + {% endfor %} +""" + +t3 = BashOperator( + task_id="templated", + bash_command=templated_command, + params={"my_param": "Parameter I passed in"}, + dag=dag, +) + +t2.set_upstream(t1) +t3.set_upstream(t1) diff --git a/logs-persistenvolumeclaim.yaml b/logs-persistenvolumeclaim.yaml new file mode 100644 index 0000000..40e8315 --- /dev/null +++ b/logs-persistenvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: airflow-logs-pvc + labels: + app: airflow-k8s +spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 512Mi diff --git a/postgres-deployment.yaml b/postgres-deployment.yaml new file mode 100644 index 0000000..0389973 --- /dev/null +++ b/postgres-deployment.yaml @@ -0,0 +1,33 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres +spec: + selector: + matchLabels: + app: postgres + + replicas: 1 + + template: + metadata: + labels: + app: postgres + + spec: + containers: + - name: postgres + image: postgres:12 + resources: + limits: + memory: 128Mi + cpu: 500m + ports: + - containerPort: 5432 + env: + - name: POSTGRES_PASSWORD + value: airflow + - name: POSTGRES_USER + value: airflow + - name: POSTGRES_DB + value: airflow \ No newline at end of file diff --git a/postgres-service.yaml b/postgres-service.yaml new file mode 100644 index 0000000..bed2111 --- /dev/null +++ b/postgres-service.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service +metadata: + name: postgres +spec: + selector: + app: postgres + + ports: + - port: 5432 + targetPort: 5432 diff --git a/requirements-configmap.yaml b/requirements-configmap.yaml new file mode 100644 index 0000000..68b1755 --- /dev/null +++ b/requirements-configmap.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: requirements-configmap +data: + requirements.txt: | + apache-airflow[kubernetes]==1.10.9 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d8f6e89 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +apache-airflow[kubernetes]==1.10.9 diff --git a/script-apply.sh b/script-apply.sh new file mode 100755 index 0000000..cd042d2 --- /dev/null +++ b/script-apply.sh @@ -0,0 +1,9 @@ +kubectl apply -f logs-persistenvolumeclaim.yaml +kubectl apply -f airflow-rbac.yaml +kubectl apply -f postgres-service.yaml +kubectl apply -f postgres-deployment.yaml +kubectl apply -f requirements-configmap.yaml +kubectl apply -f airflow-envvars-configmap.yaml +kubectl apply -f airflow-webserver-service.yaml +kubectl apply -f airflow-webserver-deployment.yaml +kubectl apply -f airflow-scheduler-deployment.yaml diff --git a/script-delete.sh b/script-delete.sh new file mode 100755 index 0000000..438ef1a --- /dev/null +++ b/script-delete.sh @@ -0,0 +1,9 @@ +kubectl delete -f airflow-rbac.yaml +kubectl delete -f postgres-service.yaml +kubectl delete -f postgres-deployment.yaml +kubectl delete -f requirements-configmap.yaml +kubectl delete -f airflow-envvars-configmap.yaml +kubectl delete -f airflow-webserver-service.yaml +kubectl delete -f airflow-webserver-deployment.yaml +kubectl delete -f airflow-scheduler-deployment.yaml +kubectl delete -f logs-persistenvolumeclaim.yaml