mirror of
https://github.com/RYDE-WORK/lnp_ml.git
synced 2026-03-21 09:36:32 +08:00
整理Makefile结构
This commit is contained in:
parent
0e917ef0d4
commit
72c292a91f
151
Makefile
151
Makefile
@ -6,18 +6,33 @@ PROJECT_NAME = lnp-ml
|
|||||||
PYTHON_VERSION = 3.8
|
PYTHON_VERSION = 3.8
|
||||||
PYTHON_INTERPRETER = python
|
PYTHON_INTERPRETER = python
|
||||||
|
|
||||||
#################################################################################
|
# --- CLI flag 变量 ---
|
||||||
# COMMANDS #
|
MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
|
||||||
#################################################################################
|
FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)
|
||||||
|
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
|
||||||
|
SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
|
||||||
|
SEED_FLAG = $(if $(SEED),--seed $(SEED),)
|
||||||
|
N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
|
||||||
|
EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
|
||||||
|
MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
|
||||||
|
OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
|
||||||
|
USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
|
||||||
|
INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# ENVIRONMENT & CODE QUALITY #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
## Install Python dependencies
|
## Install Python dependencies
|
||||||
.PHONY: requirements
|
.PHONY: requirements
|
||||||
requirements:
|
requirements:
|
||||||
pixi install
|
pixi install
|
||||||
|
|
||||||
|
## Set up Python interpreter environment
|
||||||
|
.PHONY: create_environment
|
||||||
|
create_environment:
|
||||||
|
@echo ">>> Pixi environment will be created when running 'make requirements'"
|
||||||
|
@echo ">>> Activate with:\npixi shell"
|
||||||
|
|
||||||
## Delete all compiled Python files
|
## Delete all compiled Python files
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
@ -25,7 +40,6 @@ clean:
|
|||||||
find . -type f -name "*.py[co]" -delete
|
find . -type f -name "*.py[co]" -delete
|
||||||
find . -type d -name "__pycache__" -delete
|
find . -type d -name "__pycache__" -delete
|
||||||
|
|
||||||
|
|
||||||
## Lint using ruff (use `make format` to do formatting)
|
## Lint using ruff (use `make format` to do formatting)
|
||||||
.PHONY: lint
|
.PHONY: lint
|
||||||
lint:
|
lint:
|
||||||
@ -38,26 +52,10 @@ format:
|
|||||||
ruff check --fix
|
ruff check --fix
|
||||||
ruff format
|
ruff format
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Set up Python interpreter environment
|
|
||||||
.PHONY: create_environment
|
|
||||||
create_environment:
|
|
||||||
|
|
||||||
@echo ">>> Pixi environment will be created when running 'make requirements'"
|
|
||||||
|
|
||||||
@echo ">>> Activate with:\npixi shell"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#################################################################################
|
#################################################################################
|
||||||
# PROJECT RULES #
|
# DATA PROCESSING #
|
||||||
#################################################################################
|
#################################################################################
|
||||||
|
|
||||||
|
|
||||||
## Preprocess internal data (raw -> interim)
|
## Preprocess internal data (raw -> interim)
|
||||||
.PHONY: preprocess
|
.PHONY: preprocess
|
||||||
preprocess: requirements
|
preprocess: requirements
|
||||||
@ -85,44 +83,24 @@ data_pretrain_cv: requirements
|
|||||||
|
|
||||||
## Process internal data with CV splitting (interim -> processed/cv)
|
## Process internal data with CV splitting (interim -> processed/cv)
|
||||||
## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
|
## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
|
||||||
SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
|
|
||||||
|
|
||||||
.PHONY: data_cv
|
.PHONY: data_cv
|
||||||
data_cv: requirements
|
data_cv: requirements
|
||||||
$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
|
$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
|
||||||
|
|
||||||
# MPNN 支持:使用 USE_MPNN=1 启用 MPNN encoder
|
#################################################################################
|
||||||
# 例如:make pretrain USE_MPNN=1
|
# TRAINING #
|
||||||
MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
|
#################################################################################
|
||||||
|
|
||||||
# Backbone 冻结:使用 FREEZE_BACKBONE=1 冻结 backbone,只训练 heads
|
|
||||||
# 例如:make finetune FREEZE_BACKBONE=1
|
|
||||||
FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)
|
|
||||||
|
|
||||||
# 设备选择:使用 DEVICE=xxx 指定设备
|
|
||||||
# 例如:make train DEVICE=cuda:0 或 make test_cv DEVICE=mps
|
|
||||||
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
|
|
||||||
|
|
||||||
## Pretrain on external data (delivery only)
|
## Pretrain on external data (delivery only)
|
||||||
.PHONY: pretrain
|
.PHONY: pretrain
|
||||||
pretrain: requirements
|
pretrain: requirements
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
## Evaluate pretrain model (delivery metrics)
|
|
||||||
.PHONY: test_pretrain
|
|
||||||
test_pretrain: requirements
|
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
|
|
||||||
|
|
||||||
## Pretrain with cross-validation (5-fold)
|
## Pretrain with cross-validation (5-fold)
|
||||||
.PHONY: pretrain_cv
|
.PHONY: pretrain_cv
|
||||||
pretrain_cv: requirements
|
pretrain_cv: requirements
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
|
|
||||||
.PHONY: test_pretrain_cv
|
|
||||||
test_pretrain_cv: requirements
|
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)
|
|
||||||
|
|
||||||
## Train model (multi-task, from scratch)
|
## Train model (multi-task, from scratch)
|
||||||
.PHONY: train
|
.PHONY: train
|
||||||
train: requirements
|
train: requirements
|
||||||
@ -143,48 +121,64 @@ train_final: requirements
|
|||||||
--init-from-pretrain models/pretrain_delivery.pt \
|
--init-from-pretrain models/pretrain_delivery.pt \
|
||||||
$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights
|
|
||||||
.PHONY: finetune_cv
|
|
||||||
finetune_cv: requirements
|
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
|
||||||
|
|
||||||
## Train with cross-validation on internal data only (5-fold, amine-based split)
|
## Train with cross-validation on internal data only (5-fold, amine-based split)
|
||||||
.PHONY: train_cv
|
.PHONY: train_cv
|
||||||
train_cv: requirements
|
train_cv: requirements
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
## Finetune with cross-validation on internal data (5-fold) with pretrained weights
|
||||||
|
.PHONY: finetune_cv
|
||||||
|
finetune_cv: requirements
|
||||||
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# EVALUATION #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
## Evaluate pretrain model (delivery metrics)
|
||||||
|
.PHONY: test_pretrain
|
||||||
|
test_pretrain: requirements
|
||||||
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
|
||||||
|
.PHONY: test_pretrain_cv
|
||||||
|
test_pretrain_cv: requirements
|
||||||
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)
|
||||||
|
|
||||||
## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
|
## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
|
||||||
.PHONY: test_cv
|
.PHONY: test_cv
|
||||||
test_cv: requirements
|
test_cv: requirements
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
|
||||||
|
.PHONY: test
|
||||||
|
test: requirements
|
||||||
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
## Run predictions
|
||||||
|
.PHONY: predict
|
||||||
|
predict: requirements
|
||||||
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# HYPERPARAMETER TUNING #
|
||||||
|
#################################################################################
|
||||||
|
# 通用参数:
|
||||||
|
# SEED 随机种子 (默认: 42)
|
||||||
|
# N_TRIALS Optuna 试验数 (默认: 20)
|
||||||
|
# EPOCHS_PER_TRIAL 每个试验的最大 epoch (默认: 30)
|
||||||
|
# MIN_STRATUM_COUNT 复合分层标签的最小样本数 (默认: 5)
|
||||||
|
# OUTPUT_DIR 输出目录 (根据命令有不同默认值)
|
||||||
|
# INIT_PRETRAIN 预训练权重路径 (默认: models/pretrain_delivery.pt)
|
||||||
|
# NO_PRETRAIN=1 禁用预训练权重
|
||||||
|
|
||||||
## Train with hyperparameter tuning
|
## Train with hyperparameter tuning
|
||||||
.PHONY: tune
|
.PHONY: tune
|
||||||
tune: requirements
|
tune: requirements
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||||
|
|
||||||
# ============ 嵌套 CV + Optuna 调参(StratifiedKFold + 类权重) ============
|
|
||||||
# 通用参数:
|
|
||||||
# SEED: 随机种子 (默认: 42)
|
|
||||||
# N_TRIALS: Optuna 试验数 (默认: 20)
|
|
||||||
# EPOCHS_PER_TRIAL: 每个试验的最大 epoch (默认: 30)
|
|
||||||
# MIN_STRATUM_COUNT: 复合分层标签的最小样本数 (默认: 5)
|
|
||||||
# OUTPUT_DIR: 输出目录 (根据命令有不同默认值)
|
|
||||||
# INIT_PRETRAIN: 预训练权重路径 (默认: models/pretrain_delivery.pt)
|
|
||||||
|
|
||||||
SEED_FLAG = $(if $(SEED),--seed $(SEED),)
|
|
||||||
N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
|
|
||||||
EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
|
|
||||||
MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
|
|
||||||
OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
|
|
||||||
USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
|
|
||||||
# 默认使用预训练权重,设置 NO_PRETRAIN=1 可禁用
|
|
||||||
INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
|
|
||||||
|
|
||||||
## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune)
|
## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune)
|
||||||
## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索
|
## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索
|
||||||
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
|
|
||||||
## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30
|
## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30
|
||||||
.PHONY: nested_cv_tune
|
.PHONY: nested_cv_tune
|
||||||
nested_cv_tune: requirements
|
nested_cv_tune: requirements
|
||||||
@ -194,7 +188,6 @@ nested_cv_tune: requirements
|
|||||||
|
|
||||||
## Final training with Optuna: 3-fold CV tune + full data train
|
## Final training with Optuna: 3-fold CV tune + full data train
|
||||||
## 用于最终模型训练:3-fold 调参后用全量数据训练(无 early-stop)
|
## 用于最终模型训练:3-fold 调参后用全量数据训练(无 early-stop)
|
||||||
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
|
|
||||||
## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1
|
## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1
|
||||||
.PHONY: final_optuna
|
.PHONY: final_optuna
|
||||||
final_optuna: requirements
|
final_optuna: requirements
|
||||||
@ -202,15 +195,9 @@ final_optuna: requirements
|
|||||||
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
||||||
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)
|
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)
|
||||||
|
|
||||||
## Run predictions
|
#################################################################################
|
||||||
.PHONY: predict
|
# SERVING & DEPLOYMENT #
|
||||||
predict: requirements
|
#################################################################################
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)
|
|
||||||
|
|
||||||
## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
|
|
||||||
.PHONY: test
|
|
||||||
test: requirements
|
|
||||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
|
|
||||||
|
|
||||||
## Formulation optimization: find optimal LNP formulation for target organ
|
## Formulation optimization: find optimal LNP formulation for target organ
|
||||||
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
|
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
|
||||||
@ -237,9 +224,8 @@ serve:
|
|||||||
@echo ""
|
@echo ""
|
||||||
@echo "然后访问: http://localhost:8501"
|
@echo "然后访问: http://localhost:8501"
|
||||||
|
|
||||||
|
|
||||||
#################################################################################
|
#################################################################################
|
||||||
# DOCKER COMMANDS #
|
# DOCKER #
|
||||||
#################################################################################
|
#################################################################################
|
||||||
|
|
||||||
## Build Docker images
|
## Build Docker images
|
||||||
@ -279,7 +265,6 @@ docker-clean:
|
|||||||
docker compose down -v --rmi local
|
docker compose down -v --rmi local
|
||||||
docker system prune -f
|
docker system prune -f
|
||||||
|
|
||||||
|
|
||||||
#################################################################################
|
#################################################################################
|
||||||
# Self Documenting Commands #
|
# Self Documenting Commands #
|
||||||
#################################################################################
|
#################################################################################
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user