forked from WordPress/openverse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
entrypoint.sh
executable file
·134 lines (112 loc) · 4.8 KB
/
entrypoint.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
set -e
function help_text() {
cat <<'END'
,-. ;-. ,--. . . . , ,--. ,-. ,-. ,--.
/ \ | ) | |\ | | / | | ) ( ` |
| | |-' |- | \| | / |- |-< `-. |-
\ / | | | | |/ | | \ . ) |
`-' ' `--' ' ' ' `--' ' ' `-' `--'
Docker entrypoint script for Openverse Airflow. This uses the upstream Airflow
entrypoint under the hood. For help running commands, see
https://airflow.apache.org/docs/docker-stack/entrypoint.html#executing-commands
Unless specified, all commands will wait for the database to be ready and
will upgrade the Airflow schema.
Usage:
help - print this help text and exit
bash [...] - drop into a bash shell or run a bash command/script
python [ ... ] - drop into a python shell or run a python command/script
(anything else) - interpreted as an argument to "airflow [ argument ]"
END
}
function header() {
size=${COLUMNS:-80}
# Print centered text between two dividers of length $size
printf '#%.0s' $(seq 1 "$size") && echo
printf "%*s\n" $(((${#1} + size) / 2)) "$1"
printf '#%.0s' $(seq 1 "$size") && echo
}
if [ "$1" == help ] || [ "$1" == --help ]; then help_text && exit 0; fi
sleep 0.1 # The $COLUMNS variable takes a moment to populate
# Reformat Slack Airflow connections
header "MODIFYING ENVIRONMENT"
# Loop through environment variables, relying on naming conventions.
# Bash loops with pipes occur in a subprocess, so we need to do some special
# subprocess manipulation via <(...) syntax to allow the `export` calls
# to propagate to the outer shell.
# See: https://unix.stackexchange.com/a/402752
while read -r var_string; do
# get the variable name
var_name=$(expr "$var_string" : '^\([A-Z_]*\)')
echo "Variable Name: $var_name"
# get the old value
old_value=$(expr "$var_string" : '^[A-Z_]*=\(http.*\)$')
echo " Old Value: $old_value"
# call python to url encode the http clause
url_encoded=$(python -c "from urllib.parse import quote_plus; import sys; print(quote_plus(sys.argv[1]))" "$old_value")
# prepend http://
new_value='http://'$url_encoded
echo " New Value: $new_value"
# set the environment variable
export "$var_name"="$new_value"
# only include Slack airflow connections and the Sensitive Terms connection
done < <(env | grep "^AIRFLOW_CONN_SLACK*\|AIRFLOW_CONN_SENSITIVE_TERMS")
if [[ $* == "webserver" ]]; then
# Wait for the database to initialize, will time out if not
airflow db check-migrations
# Set up Airflow Variable defaults with descriptions automatically
header "SETTING VARIABLE DEFAULTS"
# List all existing airflow variables, ignoring the first descriptive "key"
output=$(airflow variables list -o plain | tail -n +2)
found_existing_vars=true
# If there are no existing variables, print this notification and continue
if [[ -z $output || $output == "No data found" ]]; then
echo "No existing variables found, proceeding to set all variables"
found_existing_vars=false
fi
# Initialize an empty array to store the variables from the output
existing_variables=()
# Iterate through each variable and add it to $existing_variables
while IFS= read -r variable; do
# Append the current variable to the array
existing_variables+=("$variable")
done <<<"$output"
if $found_existing_vars; then
echo -e "Found the following existing variables (the values of these will not be overwritten):\n"
for variable in "${existing_variables[@]}"; do
echo "$variable"
done
fi
# Now, iterate through each row of variables.tsv and and only
# run airflow variables set --description <description> <key> <value>
# if the key doesn't already exist in the database, i.e it is not found in
# $existing_variables
while IFS=$'\t' read -r column1 column2 column3; do
# skip the first meta row or a row with empty data
if [[ $column3 == "description" ]] || [[ -z $column2 ]]; then
continue
fi
# check if current key already exists
matched=false
for variable in "${existing_variables[@]}"; do
if [[ $variable == "$column1" ]]; then
matched=true
fi
done
if ! $matched; then
airflow variables set --description "$column3" "$column1" "$column2"
fi
done <"variables.tsv"
# Print the new variables list
new_varibles_list=$(airflow variables list -o plain | tail -n +2)
echo -e "The following variables are now set:\n"
echo "$new_varibles_list"
# If the last line in variables.tsv did not correctly terminate
# with a new line character then this variable would not be empty
# and this means the last line would not be read correctly.
if [ -n "$column1" ]; then
echo -e "WARNING: Missing new line character detected!!!\n"
echo -e "Last variable added to variables.tsv might not be picked up,\nEnsure it ends with a new line character and retry."
fi
fi
exec /entrypoint "$@"