################################################################################# # GLOBALS # ################################################################################# PROJECT_NAME = lnp-ml PYTHON_VERSION = 3.8 PYTHON_INTERPRETER = python # --- CLI flag 变量 --- MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,) FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,) DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),) SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,) SEED_FLAG = $(if $(SEED),--seed $(SEED),) N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),) EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),) MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),) OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),) USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,) INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt)) ################################################################################# # ENVIRONMENT & CODE QUALITY # ################################################################################# ## Install Python dependencies .PHONY: requirements requirements: pixi install ## Set up Python interpreter environment .PHONY: create_environment create_environment: @echo ">>> Pixi environment will be created when running 'make requirements'" @echo ">>> Activate with:\npixi shell" ## Delete all compiled Python files .PHONY: clean clean: find . -type f -name "*.py[co]" -delete find . -type d -name "__pycache__" -delete ## Lint using ruff (use `make format` to do formatting) .PHONY: lint lint: ruff format --check ruff check ## Format source code with ruff .PHONY: format format: ruff check --fix ruff format ################################################################################# # DATA PROCESSING # ################################################################################# ## Preprocess internal data (raw -> interim) .PHONY: preprocess preprocess: requirements $(PYTHON_INTERPRETER) scripts/preprocess_internal.py ## Process dataset (interim -> processed) .PHONY: data data: requirements $(PYTHON_INTERPRETER) scripts/process_data.py ## Process dataset for final training (interim -> processed/final, train:val=9:1, no test) .PHONY: data_final data_final: requirements $(PYTHON_INTERPRETER) scripts/process_data_final.py ## Process external data for pretrain (external -> processed) .PHONY: data_pretrain data_pretrain: requirements $(PYTHON_INTERPRETER) scripts/process_external.py ## Process baseline CV data for benchmark (external/all_amine_split_for_LiON -> processed/benchmark) .PHONY: data_benchmark data_benchmark: requirements $(PYTHON_INTERPRETER) scripts/process_benchmark_data.py ## Process internal data with CV splitting (interim -> processed/cv) ## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle) .PHONY: data_cv data_cv: requirements $(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG) ################################################################################# # TRAINING # ################################################################################# ## Pretrain on external data (delivery only) .PHONY: pretrain pretrain: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG) ## Benchmark on baseline CV data: 5-fold train + test (delivery only) .PHONY: benchmark benchmark: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark main $(MPNN_FLAG) $(DEVICE_FLAG) $(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark test $(DEVICE_FLAG) ## Train model (multi-task, from scratch) .PHONY: train train: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG) $(DEVICE_FLAG) ## Finetune from pretrained checkpoint (use FREEZE_BACKBONE=1 to freeze backbone) .PHONY: finetune finetune: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) ## Final training using all data (train:val=9:1, no test set), with pretrained weights .PHONY: train_final train_final: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train \ --train-path data/processed/final/train.parquet \ --val-path data/processed/final/val.parquet \ --output-dir models/final \ --init-from-pretrain models/pretrain_delivery.pt \ $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) ## Train with cross-validation on internal data only (5-fold, amine-based split) .PHONY: train_cv train_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) ## Finetune with cross-validation on internal data (5-fold) with pretrained weights .PHONY: finetune_cv finetune_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) ################################################################################# # EVALUATION # ################################################################################# ## Evaluate pretrain model (delivery metrics) .PHONY: test_pretrain test_pretrain: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG) ## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint) .PHONY: test_cv test_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG) ## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint) .PHONY: test test: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG) ## Run predictions .PHONY: predict predict: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG) ################################################################################# # HYPERPARAMETER TUNING # ################################################################################# # 通用参数: # SEED 随机种子 (默认: 42) # N_TRIALS Optuna 试验数 (默认: 20) # EPOCHS_PER_TRIAL 每个试验的最大 epoch (默认: 30) # MIN_STRATUM_COUNT 复合分层标签的最小样本数 (默认: 5) # OUTPUT_DIR 输出目录 (根据命令有不同默认值) # INIT_PRETRAIN 预训练权重路径 (默认: models/pretrain_delivery.pt) # NO_PRETRAIN=1 禁用预训练权重 ## Train with hyperparameter tuning .PHONY: tune tune: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG) ## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune) ## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索 ## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30 .PHONY: nested_cv_tune nested_cv_tune: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.nested_cv_optuna \ $(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \ $(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) ## Final training with Optuna: 3-fold CV tune + full data train ## 用于最终模型训练:3-fold 调参后用全量数据训练(无 early-stop) ## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1 .PHONY: final_optuna final_optuna: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.final_train_optuna_cv \ $(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \ $(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG) ################################################################################# # SERVING & DEPLOYMENT # ################################################################################# ## Formulation optimization: find optimal LNP formulation for target organ ## Usage: make optimize SMILES="CC(C)..." ORGAN=liver .PHONY: optimize optimize: requirements $(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG) ## Start FastAPI backend server (port 8000) .PHONY: api api: requirements uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload ## Start Streamlit frontend app (port 8501) .PHONY: webapp webapp: requirements streamlit run app/app.py --server.port 8501 ## Start both API and webapp (run in separate terminals) .PHONY: serve serve: @echo "请在两个终端分别运行:" @echo " 终端 1: make api" @echo " 终端 2: make webapp" @echo "" @echo "然后访问: http://localhost:8501" ################################################################################# # DOCKER # ################################################################################# ## Build Docker images .PHONY: docker-build docker-build: docker compose build ## Start all services with Docker Compose .PHONY: docker-up docker-up: docker compose up -d ## Stop all Docker services .PHONY: docker-down docker-down: docker compose down ## View Docker logs .PHONY: docker-logs docker-logs: docker compose logs -f ## Build and start all services .PHONY: docker-serve docker-serve: docker-build docker-up @echo "" @echo "🚀 服务已启动!" @echo " - API: http://localhost:8000" @echo " - Web 应用: http://localhost:8501" @echo "" @echo "查看日志: make docker-logs" @echo "停止服务: make docker-down" ## Clean Docker resources (images, volumes, etc.) .PHONY: docker-clean docker-clean: docker compose down -v --rmi local docker system prune -f ################################################################################# # Self Documenting Commands # ################################################################################# .DEFAULT_GOAL := help define PRINT_HELP_PYSCRIPT import re, sys; \ lines = '\n'.join([line for line in sys.stdin]); \ matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \ print('Available rules:\n'); \ print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches])) endef export PRINT_HELP_PYSCRIPT help: @$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)