forked from WordPress/openverse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
justfile
187 lines (153 loc) · 5.76 KB
/
justfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
set dotenv-load := false
COLOR := "\\033[0;34m"
NO_COLOR := "\\033[0m"
# Show all available recipes
@_default:
printf "\n{{ COLOR }}# Catalog (path: \`catalog/\`)\n"
printf "============================{{ NO_COLOR }}\n"
just --list --unsorted
SERVICE := env_var_or_default("SERVICE", "scheduler")
###########
# Version #
###########
export CATALOG_PY_VERSION := `grep '# PYTHON' requirements-prod.txt | awk -F= '{print $2}'`
export CATALOG_AIRFLOW_VERSION := `grep '^apache-airflow' requirements-prod.txt | awk -F= '{print $3}'`
# Print the required Python version
@py-version:
echo $CATALOG_PY_VERSION
# Print the current Airflow version
@airflow-version:
echo $CATALOG_AIRFLOW_VERSION
###########
# Install #
###########
# Create a virtual environment using the project Python version
venv:
# Invokes `python<version>`, like `python3.10`
bash -c "python$CATALOG_PY_VERSION -m venv .venv/"
# Check that the active Python version matches the required Python version
check-py-version:
#!/usr/bin/env python
import os
import sys
current = f"{sys.version_info[0]}.{sys.version_info[1]}"
required = os.environ.get("CATALOG_PY_VERSION")
if current != required:
print(f"Detected Python version {current} but required {required}", file=sys.stderr)
sys.exit(1)
# Install dependencies
install *args: check-py-version
python -m pip install -r requirements-dev.txt
######
# Up #
######
# Bring up services specific to the catalog profile
[positional-arguments]
up *flags:
env COMPOSE_PROFILES="catalog" just ../up "$@"
# Bring up services specific to the catalog profile, except Airflow
[positional-arguments]
up-deps *flags:
env COMPOSE_PROFILES="catalog_dependencies" just ../up "$@"
# Load sample data into upstream DB
init: up
cd .. && ./load_sample_data.sh -c
# Tear down all the services and recreate the ones of the catalog profile
recreate:
just ../down -v
just up --force-recreate --build
just init
##################
# Administration #
##################
# Launch a Bash shell in an existing container under `SERVICE`
shell:
env DC_USER="airflow" just ../exec {{ SERVICE }} /bin/bash
# Run an Airflow CLI command and then exit
[positional-arguments]
cli *args:
env DC_USER="airflow" just ../exec {{ SERVICE }} "$@"
# Launch an IPython shell in a new container under `SERVICE`
[positional-arguments]
ipython *args: up-deps
env DC_USER="airflow" just ../run \
--workdir /opt/airflow/catalog/dags \
{{ SERVICE }} \
bash -c "ipython ${@:2}"
# Launch a `pgcli` shell in the PostgreSQL container
pgcli db_user_pass="deploy" db_name="openledger": up
just ../_pgcli upstream_db {{ db_user_pass }} {{ db_name }} upstream_db
#########
# Tests #
#########
# Run a command in a test container under `SERVICE`
[positional-arguments]
_mount-test *command: up-deps
env DC_USER="airflow" just ../run \
-e AIRFLOW_VAR_INGESTION_LIMIT=1000000 \
-w /opt/airflow/catalog \
--volume {{ justfile_directory() }}/../docker:/opt/airflow/docker/ \
{{ SERVICE }} \
"$@"
# Launch a Bash shell in a test container under `SERVICE`
# Run pytest with `--pdb` to workaround xdist breaking pdb.set_trace()
test-session:
just _mount-test bash
# Run pytest in a test container under `SERVICE`
[positional-arguments]
test *args:
just _mount-test bash -c "pytest $@"
#############
# Utilities #
#############
# Generate the documentation (either "dag" or "media-props")
generate-docs doc="dag" fail_on_diff="false":
#! /usr/bin/env bash
set -e
if [ "{{ doc }}" == "dag" ]; then
SCRIPT_PATH="catalog/utilities/dag_doc_gen/dag_doc_generation.py"
GENERATED_REL_PATH="utilities/dag_doc_gen/DAGs.md"
FINAL_FILE="documentation/catalog/reference/DAGs.md"
NAME="DAG"
elif [ "{{ doc }}" == "media-props" ]; then
SCRIPT_PATH="catalog/utilities/media_props_gen/generate_media_properties.py"
GENERATED_REL_PATH="utilities/media_props_gen/media_properties.md"
FINAL_FILE="documentation/meta/media_properties/catalog.md"
NAME="Media properties"
else
echo "Invalid documentation type specified, use \`dag\` or \`media-props\`. Exiting."
exit 1
fi
GENERATED_ABS_PATH="/opt/airflow/catalog/${GENERATED_REL_PATH}"
just ../run \
--volume {{ justfile_directory() }}/../docker:/opt/airflow/docker/ \
-e PYTHONPATH=/opt/airflow/catalog:/opt/airflow/catalog/dags \
{{ SERVICE }} \
bash -c "python $SCRIPT_PATH && chmod 666 $GENERATED_ABS_PATH"
TEMP="documentation/meta/temp"
mv ../catalog/"$GENERATED_REL_PATH" ../"$TEMP".md
echo "Moved the generated file to ../$TEMP.md"
echo -n "Running linting..."
# Linting step afterwards is necessary since the generated output differs greatly from what prettier expects
just ../lint prettier "$TEMP".md &>/dev/null || true
echo "Linting done!"
echo -n "Replacing linted md <hr> '---' with '----' required by sphinx..."
sed 's/^---$/----/' "../$TEMP.md" > "../$TEMP.tmp"
mv "../$TEMP.tmp" "../$TEMP.md"
echo "Replacements done!"
mv ../$TEMP.md ../$FINAL_FILE
echo "Moved the generated file to ../$FINAL_FILE"
if {{ fail_on_diff }}; then
set +e
git diff --exit-code -- ../$FINAL_FILE
if [ $? -ne 0 ]; then
printf "\n\n\e[31m!! Changes found in $NAME documentation, please run 'just catalog/generate-docs {{ doc }}' locally and commit difference !!\n\n"
exit 1
fi
fi
# Generate files for a new provider
add-provider provider_name endpoint +media_types="image":
python3 templates/create_provider_ingester.py "{{ provider_name }}" "{{ endpoint }}" -m {{ media_types }}
# Run bash in the container set in the SERVICE env-var
run:
just ../run {{ SERVICE }} bash