-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
85 lines (65 loc) · 2.46 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
.PHONY: setup download-data install test clean venv
VENV = .venv
PYTHON = $(VENV)/bin/python
PIP = $(VENV)/bin/pip
# Enhanced performance optimization env vars
export OMP_NUM_THREADS=8 # OpenMP threads
export MKL_NUM_THREADS=8 # MKL threads
export NUMEXPR_NUM_THREADS=8 # NumExpr threads
export OPENBLAS_NUM_THREADS=8 # OpenBLAS threads
export TOKENIZERS_PARALLELISM=true # Enable HuggingFace tokenizer parallelism
export TORCH_NUM_THREADS=8 # PyTorch threads
export CUDA_LAUNCH_BLOCKING=0 # Async CUDA operations
export PYTHONWARNINGS="ignore" # Reduce overhead from warnings
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512 # Memory allocation strategy
export TRANSFORMERS_OFFLINE=1 # Avoid network checks
export HF_DATASETS_OFFLINE=1 # Avoid dataset downloads during processing
venv:
python3 -m venv $(VENV)
setup: venv download-data install install-dashboard prepare-datasets
$(PYTHON) -m spacy download en_core_web_sm
install-deps: venv
$(PIP) install requests tqdm datasets transformers torch numpy sentencepiece protobuf \
nltk spacy scikit-learn pandas scipy \
beautifulsoup4 lxml textacy
download-data: install-deps
$(PYTHON) -c "import nltk; nltk.download('punkt'); nltk.download('stopwords')"
$(PYTHON) scripts/download_datasets.py
install: venv
$(PIP) install -e .
$(PIP) install spacy joblib pandas datasets transformers sentence-transformers tqdm torch numpy
$(PYTHON) -m spacy download en_core_web_sm
test: venv
PYTHONPATH=. $(PYTHON) -m pytest tests/ -v --cov=src
format: venv
$(PYTHON) -m black src/ tests/
$(PYTHON) -m isort src/ tests/
lint: venv
$(PYTHON) -m flake8 src/ tests/
$(PYTHON) -m mypy src/ tests/
run-optimized: install-deps
$(PYTHON) run_optimized.py --config config/config.yaml
run: venv
$(PYTHON) run_optimized.py --config config/config.yaml
clean:
rm -rf data/scisummnet.zip $(VENV)
find . -type d -name "__pycache__" -exec rm -rf {} +
find . -type d -name ".pytest_cache" -exec rm -rf {} +
find . -type d -name ".mypy_cache" -exec rm -rf {} +
# Dashboard commands
run-dashboard:
$(PYTHON) scripts/run_dashboard.py
install-dashboard:
$(PIP) install dash dash-bootstrap-components plotly requests
test-api:
$(PYTHON) -m pytest tests/test_arxiv_api.py -v
# Training commands
train-models:
$(PYTHON) scripts/train_models.py
compare-models:
$(PYTHON) scripts/compare_models.py
test-models:
$(PYTHON) -m pytest tests/test_model_trainer.py -v
# Dataset preparation
prepare-datasets:
$(PYTHON) scripts/prepare_datasets.py