From 72c292a91ffff36090e44ccb6d7aec27f0560be1 Mon Sep 17 00:00:00 2001 From: RYDE-WORK Date: Thu, 26 Feb 2026 18:06:07 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B4=E7=90=86Makefile=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 151 +++++++++++++++++++++++++------------------------------ 1 file changed, 68 insertions(+), 83 deletions(-) diff --git a/Makefile b/Makefile index 96341d0..84fba2e 100644 --- a/Makefile +++ b/Makefile @@ -6,18 +6,33 @@ PROJECT_NAME = lnp-ml PYTHON_VERSION = 3.8 PYTHON_INTERPRETER = python -################################################################################# -# COMMANDS # -################################################################################# +# --- CLI flag 变量 --- +MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,) +FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,) +DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),) +SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,) +SEED_FLAG = $(if $(SEED),--seed $(SEED),) +N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),) +EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),) +MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),) +OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),) +USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,) +INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt)) +################################################################################# +# ENVIRONMENT & CODE QUALITY # +################################################################################# ## Install Python dependencies .PHONY: requirements requirements: pixi install - - +## Set up Python interpreter environment +.PHONY: create_environment +create_environment: + @echo ">>> Pixi environment will be created when running 'make requirements'" + @echo ">>> Activate with:\npixi shell" ## Delete all compiled Python files .PHONY: clean @@ -25,7 +40,6 @@ clean: find . -type f -name "*.py[co]" -delete find . -type d -name "__pycache__" -delete - ## Lint using ruff (use `make format` to do formatting) .PHONY: lint lint: @@ -38,26 +52,10 @@ format: ruff check --fix ruff format - - - - -## Set up Python interpreter environment -.PHONY: create_environment -create_environment: - - @echo ">>> Pixi environment will be created when running 'make requirements'" - - @echo ">>> Activate with:\npixi shell" - - - - ################################################################################# -# PROJECT RULES # +# DATA PROCESSING # ################################################################################# - ## Preprocess internal data (raw -> interim) .PHONY: preprocess preprocess: requirements @@ -85,44 +83,24 @@ data_pretrain_cv: requirements ## Process internal data with CV splitting (interim -> processed/cv) ## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle) -SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,) - .PHONY: data_cv data_cv: requirements $(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG) -# MPNN 支持:使用 USE_MPNN=1 启用 MPNN encoder -# 例如:make pretrain USE_MPNN=1 -MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,) - -# Backbone 冻结:使用 FREEZE_BACKBONE=1 冻结 backbone,只训练 heads -# 例如:make finetune FREEZE_BACKBONE=1 -FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,) - -# 设备选择:使用 DEVICE=xxx 指定设备 -# 例如:make train DEVICE=cuda:0 或 make test_cv DEVICE=mps -DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),) +################################################################################# +# TRAINING # +################################################################################# ## Pretrain on external data (delivery only) .PHONY: pretrain pretrain: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG) -## Evaluate pretrain model (delivery metrics) -.PHONY: test_pretrain -test_pretrain: requirements - $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG) - ## Pretrain with cross-validation (5-fold) .PHONY: pretrain_cv pretrain_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG) -## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint) -.PHONY: test_pretrain_cv -test_pretrain_cv: requirements - $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG) - ## Train model (multi-task, from scratch) .PHONY: train train: requirements @@ -143,48 +121,64 @@ train_final: requirements --init-from-pretrain models/pretrain_delivery.pt \ $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) -## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights -.PHONY: finetune_cv -finetune_cv: requirements - $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) - ## Train with cross-validation on internal data only (5-fold, amine-based split) .PHONY: train_cv train_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) +## Finetune with cross-validation on internal data (5-fold) with pretrained weights +.PHONY: finetune_cv +finetune_cv: requirements + $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG) + +################################################################################# +# EVALUATION # +################################################################################# + +## Evaluate pretrain model (delivery metrics) +.PHONY: test_pretrain +test_pretrain: requirements + $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG) + +## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint) +.PHONY: test_pretrain_cv +test_pretrain_cv: requirements + $(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG) ## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint) .PHONY: test_cv test_cv: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG) +## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint) +.PHONY: test +test: requirements + $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG) + +## Run predictions +.PHONY: predict +predict: requirements + $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG) + +################################################################################# +# HYPERPARAMETER TUNING # +################################################################################# +# 通用参数: +# SEED 随机种子 (默认: 42) +# N_TRIALS Optuna 试验数 (默认: 20) +# EPOCHS_PER_TRIAL 每个试验的最大 epoch (默认: 30) +# MIN_STRATUM_COUNT 复合分层标签的最小样本数 (默认: 5) +# OUTPUT_DIR 输出目录 (根据命令有不同默认值) +# INIT_PRETRAIN 预训练权重路径 (默认: models/pretrain_delivery.pt) +# NO_PRETRAIN=1 禁用预训练权重 + ## Train with hyperparameter tuning .PHONY: tune tune: requirements $(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG) -# ============ 嵌套 CV + Optuna 调参(StratifiedKFold + 类权重) ============ -# 通用参数: -# SEED: 随机种子 (默认: 42) -# N_TRIALS: Optuna 试验数 (默认: 20) -# EPOCHS_PER_TRIAL: 每个试验的最大 epoch (默认: 30) -# MIN_STRATUM_COUNT: 复合分层标签的最小样本数 (默认: 5) -# OUTPUT_DIR: 输出目录 (根据命令有不同默认值) -# INIT_PRETRAIN: 预训练权重路径 (默认: models/pretrain_delivery.pt) - -SEED_FLAG = $(if $(SEED),--seed $(SEED),) -N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),) -EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),) -MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),) -OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),) -USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,) -# 默认使用预训练权重,设置 NO_PRETRAIN=1 可禁用 -INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt)) - ## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune) ## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索 -## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用 ## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30 .PHONY: nested_cv_tune nested_cv_tune: requirements @@ -194,7 +188,6 @@ nested_cv_tune: requirements ## Final training with Optuna: 3-fold CV tune + full data train ## 用于最终模型训练:3-fold 调参后用全量数据训练(无 early-stop) -## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用 ## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1 .PHONY: final_optuna final_optuna: requirements @@ -202,15 +195,9 @@ final_optuna: requirements $(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \ $(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG) -## Run predictions -.PHONY: predict -predict: requirements - $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG) - -## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint) -.PHONY: test -test: requirements - $(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG) +################################################################################# +# SERVING & DEPLOYMENT # +################################################################################# ## Formulation optimization: find optimal LNP formulation for target organ ## Usage: make optimize SMILES="CC(C)..." ORGAN=liver @@ -237,9 +224,8 @@ serve: @echo "" @echo "然后访问: http://localhost:8501" - ################################################################################# -# DOCKER COMMANDS # +# DOCKER # ################################################################################# ## Build Docker images @@ -279,7 +265,6 @@ docker-clean: docker compose down -v --rmi local docker system prune -f - ################################################################################# # Self Documenting Commands # #################################################################################