-
-
Notifications
You must be signed in to change notification settings - Fork 23
/
Makefile
147 lines (117 loc) · 4.75 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#
# Makefile
#
.PHONY: etl docs full lab test-default publish grapher dot watch clean clobber deploy api activate
include default.mk
SRC = etl snapshots apps api tests docs
PYTHON_PLATFORM = $(shell python -c "import sys; print(sys.platform)")
LIBS = lib/*
help:
@echo 'Available commands:'
@echo
@echo ' make clean Delete all non-reference data in the data/ folder'
@echo ' make clobber Delete non-reference data and .venv'
@echo ' make deploy Re-run the full ETL on production'
@echo ' make docs Serve documentation locally'
@echo ' make dot Build a visual graph of the dependencies'
@echo ' make etl Fetch data and run all transformations for garden'
@echo ' make format Format code'
@echo ' make format-all Format code (including modules in lib/)'
@echo ' make full Fetch all data and run full transformations'
@echo ' make grapher Publish supported datasets to Grapher'
@echo ' make sync.catalog Sync catalog from R2 into local data/ folder'
@echo ' make lab Start a Jupyter Lab server'
@echo ' make publish Publish the generated catalog to S3'
@echo ' make api Start the ETL API on port 8081'
@echo ' make fasttrack Start Fast-track on port 8082'
@echo ' make chart-sync Start Chart-sync on port 8083'
@echo ' make test Run all linting and unit tests'
@echo ' make test-all Run all linting and unit tests (including for modules in lib/)'
@echo ' make watch Run all tests, watching for changes'
@echo ' make watch-all Run all tests, watching for changes (including for modules in lib/)'
@echo
docs: .venv
.venv/bin/mkdocs serve
watch-all:
.venv/bin/watchmedo shell-command -c 'clear; make unittest; for lib in $(LIBS); do (cd $$lib && make unittest); done' --recursive --drop .
test-all:
@echo '================ etl ================='
@make test
@for lib in $(LIBS); do \
echo "================ $$lib ================="; \
(cd $$lib && make test); \
done
format-all:
@echo '================ etl ================='
@make test
@for lib in $(LIBS); do \
echo "================ $$lib ================="; \
(cd $$lib && make format); \
done
watch: .venv
.venv/bin/watchmedo shell-command -c 'clear; make check-formatting lint check-typing coverage' --recursive --drop .
unittest: .venv
@echo '==> Running unit tests'
.venv/bin/pytest -m "not integration" tests
test: check-formatting check-linting check-typing unittest version-tracker
test-integration: .venv
@echo '==> Running integration tests'
.venv/bin/pytest -m integration tests
coverage: .venv
@echo '==> Unit testing with coverage'
.venv/bin/pytest --cov=etl --cov-report=term-missing tests
etl: .venv
@echo '==> Running etl on garden'
.venv/bin/etl run garden
full: .venv
@echo '==> Running full etl'
.venv/bin/etl run
clean:
@echo '==> Cleaning data/ folder'
rm -rf data && git checkout data
clobber: clean
find . -name .venv | xargs rm -rf
find . -name .pytest_cache | xargs rm -rf
find . -name .cachedir | xargs rm -rf
lab: .venv
@echo '==> Starting Jupyter server'
.venv/bin/jupyter lab
publish: etl reindex
@echo '==> Publishing the catalog'
.venv/bin/etl d publish --private
reindex: .venv
@echo '==> Creating a catalog index'
.venv/bin/etl d reindex
prune: .venv
@echo '==> Prune datasets with no recipe from catalog'
.venv/bin/etl d prune
# Syncing catalog is useful if you want to avoid rebuilding it locally from scratch
# which could take a few hours. This will download ~10gb from the main channels
# (meadow, garden, open_numbers) and is especially useful when we increase ETL_EPOCH
# or update regions.
sync.catalog: .venv
@echo '==> Sync catalog from R2 into local data/ folder (~10gb)'
rclone copy owid-r2:owid-catalog/ data/ --verbose --fast-list --transfers=64 --checkers=64 --include "/meadow/**" --include "/garden/**" --include "/open_numbers/**"
grapher: .venv
@echo '==> Running full etl with grapher upsert'
.venv/bin/etl run --grapher
dot: dependencies.pdf
dependencies.pdf: .venv dag/main.yml etl/to_graphviz.py
.venv/bin/etl graphviz dependencies.dot
dot -Tpdf dependencies.dot >[email protected]
mv -f [email protected] $@
deploy:
@echo '==> Rebuilding the production ETL from origin/master'
ssh -t [email protected] /home/owid/analytics/ops/scripts/etl-prod.sh
version-tracker: .venv
@echo '==> Check that no archive dataset is used by an active dataset, and that all active datasets are used'
.venv/bin/etl d version-tracker
api: .venv
@echo '==> Starting ETL API on http://localhost:8081/api/v1/indicators'
.venv/bin/uvicorn api.main:app --reload --port 8081 --host 0.0.0.0
fasttrack: .venv
@echo '==> Starting Fast-track on http://localhost:8082/'
.venv/bin/fasttrack --skip-auto-open --port 8082
wizard: .venv
@echo '==> Starting Wizard on http://localhost:8053/'
.venv/bin/etlwiz