-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
134 lines (99 loc) · 3.77 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# ----------------------------------
# INSTALL & TEST
# ----------------------------------
install_requirements:
@pip install -r requirements.txt
check_code:
@flake8 scripts/* TaxiFareModel/*.py
black:
@black scripts/* TaxiFareModel/*.py
test:
@coverage run -m pytest tests/*.py
@coverage report -m --omit="${VIRTUAL_ENV}/lib/python*"
ftest:
@Write me
clean:
@rm -f */version.txt
@rm -f .coverage
@rm -fr */__pycache__ */*.pyc __pycache__
@rm -fr build dist
@rm -fr TaxiFareModel-*.dist-info
@rm -fr TaxiFareModel.egg-info
@rm -fr build dist *.dist-info *.egg-info
@rm -fr */*.pyc
install:
@pip install . -U
all: clean install test black check_code
count_lines:
@find ./ -name '*.py' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
@find ./scripts -name '*-*' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
@find ./tests -name '*.py' -exec wc -l {} \; | sort -n| awk \
'{printf "%4s %s\n", $$1, $$2}{s+=$$0}END{print s}'
@echo ''
# ----------------------------------
# UPLOAD PACKAGE TO PYPI
# ----------------------------------
PYPI_USERNAME=<AUTHOR>
build:
@python setup.py sdist bdist_wheel
pypi_test:
@twine upload -r testpypi dist/* -u $(PYPI_USERNAME)
pypi:
@twine upload dist/* -u $(PYPI_USERNAME)
# ----------------------------------
# CREATE A GCP BUCKET
# ----------------------------------
# project id - replace with your GCP project id
PROJECT_ID=wagon-bootcamp-346220
# bucket name - replace with your GCP bucket name
BUCKET_NAME=wagon-data-847-cameronpesant
# choose your region from https://cloud.google.com/storage/docs/locations#available_locations
REGION=northamerica-northeast1
set_project:
@gcloud config set project ${PROJECT_ID}
create_bucket:
@gsutil mb -l ${REGION} -p ${PROJECT_ID} gs://${BUCKET_NAME}
# ----------------------------------
# UPLOAD DATASET TO GCP
# ----------------------------------
# path to the file to upload to GCP (the path to the file should be absolute or should match the directory where the make command is ran)
# replace with your local path to the `train_1k.csv` and make sure to put the path between quotes
LOCAL_PATH="/home/sarah/code/scameronp/TaxiFareModel/raw_data/train_1k.csv"
# bucket directory in which to store the uploaded file (`data` is an arbitrary name that we choose to use)
BUCKET_FOLDER=data
# name for the uploaded file inside of the bucket (we choose not to rename the file that we upload)
BUCKET_FILE_NAME=$(shell basename ${LOCAL_PATH})
upload_data:
# @gsutil cp train_1k.csv gs://wagon-ml-my-bucket-name/data/train_1k.csv
@gsutil cp ${LOCAL_PATH} gs://${BUCKET_NAME}/${BUCKET_FOLDER}/${BUCKET_FILE_NAME}
# ----------------------------------
# TRAIN ON AI PLATFORM
# ----------------------------------
##### Training - - - - - - - - - - - - - - - - - - - - - -
# will store the packages uploaded to GCP for the training
BUCKET_TRAINING_FOLDER = 'trainings'
### GCP AI Platform - - - - - - - - - - - - - - - - - - - -
##### Machine configuration - - - - - - - - - - - - - - - -
PYTHON_VERSION=3.7
FRAMEWORK=scikit-learn
RUNTIME_VERSION=1.15
##### Package params - - - - - - - - - - - - - - - - - - -
PACKAGE_NAME=TaxiFareModel
FILENAME=trainer
##### Job - - - - - - - - - - - - - - - - - - - - - - - - -
JOB_NAME=taxi_fare_training_pipeline_$(shell date +'%Y%m%d_%H%M%S')
run_locally:
@python -m ${PACKAGE_NAME}.${FILENAME}
gcp_submit_training:
gcloud ai-platform jobs submit training ${JOB_NAME} \
--job-dir gs://${BUCKET_NAME}/${BUCKET_TRAINING_FOLDER} \
--package-path ${PACKAGE_NAME} \
--module-name ${PACKAGE_NAME}.${FILENAME} \
--python-version=${PYTHON_VERSION} \
--runtime-version=${RUNTIME_VERSION} \
--region ${REGION} \
--stream-logs