mirror of
https://github.com/RYDE-WORK/lnp_ml.git
synced 2026-03-21 01:27:00 +08:00
244 lines
9.3 KiB
Makefile
244 lines
9.3 KiB
Makefile
#################################################################################
|
|
# GLOBALS #
|
|
#################################################################################
|
|
|
|
PROJECT_NAME = lnp-ml
|
|
PYTHON_VERSION = 3.8
|
|
PYTHON_INTERPRETER = python
|
|
|
|
# --- CLI flag 变量 ---
|
|
MPNN_FLAG = $(if $(NO_MPNN),,--use-mpnn)
|
|
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
|
|
SEED_FLAG = $(if $(SEED),--seed $(SEED),)
|
|
N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
|
|
EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
|
|
MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
|
|
OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
|
|
USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
|
|
PARALLEL_FLAG = $(if $(PARALLEL),--parallel,)
|
|
INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
|
|
|
|
#################################################################################
|
|
# ENVIRONMENT & CODE QUALITY #
|
|
#################################################################################
|
|
|
|
## Install Python dependencies
|
|
.PHONY: requirements
|
|
requirements:
|
|
pixi install
|
|
|
|
## Set up Python interpreter environment
|
|
.PHONY: create_environment
|
|
create_environment:
|
|
@echo ">>> Pixi environment will be created when running 'make requirements'"
|
|
@echo ">>> Activate with:\npixi shell"
|
|
|
|
## Delete all compiled Python files
|
|
.PHONY: clean
|
|
clean:
|
|
find . -type f -name "*.py[co]" -delete
|
|
find . -type d -name "__pycache__" -delete
|
|
|
|
## Lint using ruff (use `make format` to do formatting)
|
|
.PHONY: lint
|
|
lint:
|
|
ruff format --check
|
|
ruff check
|
|
|
|
## Format source code with ruff
|
|
.PHONY: format
|
|
format:
|
|
ruff check --fix
|
|
ruff format
|
|
|
|
#################################################################################
|
|
# DATA PROCESSING #
|
|
#################################################################################
|
|
|
|
## Preprocess internal data (raw -> interim)
|
|
.PHONY: preprocess
|
|
preprocess: requirements
|
|
$(PYTHON_INTERPRETER) scripts/preprocess_internal.py
|
|
|
|
## Process external data for pretrain (external -> processed)
|
|
.PHONY: data_pretrain
|
|
data_pretrain: requirements
|
|
$(PYTHON_INTERPRETER) scripts/process_external.py
|
|
|
|
## Process baseline CV data for benchmark (external/all_amine_split_for_LiON -> processed/benchmark)
|
|
.PHONY: data_benchmark
|
|
data_benchmark: requirements
|
|
$(PYTHON_INTERPRETER) scripts/process_benchmark_data.py
|
|
|
|
#################################################################################
|
|
# BENCHMARKING #
|
|
#################################################################################
|
|
|
|
## Benchmark on baseline CV data: 5-fold train + test (delivery only)
|
|
.PHONY: benchmark
|
|
benchmark: requirements
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark main $(MPNN_FLAG) $(DEVICE_FLAG)
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.benchmark test $(DEVICE_FLAG)
|
|
|
|
## Evaluate baseline method on public test splits (test.csv vs preds.csv in cv_*)
|
|
.PHONY: baseline
|
|
baseline: requirements
|
|
$(PYTHON_INTERPRETER) scripts/evaluate_external.py
|
|
|
|
#################################################################################
|
|
# TRAINING (Nested CV + Optuna) #
|
|
#################################################################################
|
|
# 通用参数:
|
|
# SEED 随机种子 (默认: 42)
|
|
# N_TRIALS Optuna 试验数 (默认: 20)
|
|
# EPOCHS_PER_TRIAL 每个试验的最大 epoch (默认: 30)
|
|
# MIN_STRATUM_COUNT 复合分层标签的最小样本数 (默认: 5)
|
|
# OUTPUT_DIR 输出目录 (根据命令有不同默认值)
|
|
# INIT_PRETRAIN 预训练权重路径 (默认: models/pretrain_delivery.pt)
|
|
# NO_PRETRAIN=1 禁用预训练权重
|
|
# USE_SWA=1 启用 SWA (final train 阶段)
|
|
# PARALLEL=1 并行运行外层 fold (nested CV 阶段,需足够 GPU 显存)
|
|
#
|
|
# 使用示例:
|
|
# make pretrain
|
|
# make train DEVICE=cuda N_TRIALS=30 USE_SWA=1 INIT_PRETRAIN=models/pretrain_delivery.pt
|
|
# make train DEVICE=cuda PARALLEL=1
|
|
|
|
|
|
## Pretrain on external data (delivery only)
|
|
.PHONY: pretrain
|
|
pretrain: requirements
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
|
|
|
|
## Train: nested CV evaluation + final model training
|
|
## Step 1: 外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索
|
|
## Step 2: 3-fold 调参后用全量数据训练最终模型
|
|
.PHONY: train
|
|
train: requirements
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.nested_cv_optuna \
|
|
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
|
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(PARALLEL_FLAG)
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.final_train_optuna_cv \
|
|
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
|
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)
|
|
|
|
#################################################################################
|
|
# INTERPRETABILITY (biodistribution feature importance) #
|
|
#################################################################################
|
|
# 参数:
|
|
# ORGAN 器官 (lymph_nodes, heart, liver, spleen, lung, kidney, muscle, all; 默认: all)
|
|
# METHOD token 级方法 (ig, ablation, attention, all; 默认: ig)
|
|
# DESC_IG 同时计算 desc 内部特征 IG (1 启用; 默认不启用)
|
|
# DESC_TOP_K 可视化展示的 top-K 特征数 (默认: 30)
|
|
# DATA 数据路径 (默认: data/interim/internal.csv)
|
|
# MODEL 模型路径 (默认: models/final/model.pt)
|
|
|
|
METHOD_FLAG = $(if $(METHOD),--method $(METHOD),)
|
|
DATA_FLAG = $(if $(DATA),--data-path $(DATA),)
|
|
MODEL_FLAG = $(if $(MODEL),--model-path $(MODEL),)
|
|
ORGAN_FLAG = $(if $(ORGAN),--organ $(ORGAN),)
|
|
DESC_IG_FLAG = $(if $(DESC_IG),--desc-ig,)
|
|
DESC_TOP_K_FLAG = $(if $(DESC_TOP_K),--desc-top-k $(DESC_TOP_K),)
|
|
|
|
## Compute biodistribution feature importance (token-level)
|
|
.PHONY: feature_importance
|
|
feature_importance: requirements
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.interpretability.token_importance \
|
|
$(ORGAN_FLAG) $(METHOD_FLAG) $(DATA_FLAG) $(MODEL_FLAG) $(DEVICE_FLAG) \
|
|
$(DESC_IG_FLAG) $(DESC_TOP_K_FLAG)
|
|
|
|
## Compute biodistribution feature importance (token + descriptor-level)
|
|
.PHONY: desc_importance
|
|
desc_importance: requirements
|
|
$(PYTHON_INTERPRETER) -m lnp_ml.interpretability.token_importance \
|
|
--desc-ig $(ORGAN_FLAG) $(METHOD_FLAG) $(DATA_FLAG) $(MODEL_FLAG) \
|
|
$(DEVICE_FLAG) $(DESC_TOP_K_FLAG)
|
|
|
|
#################################################################################
|
|
# SERVING & DEPLOYMENT #
|
|
#################################################################################
|
|
|
|
## Formulation optimization: find optimal LNP formulation for target organ
|
|
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
|
|
.PHONY: optimize
|
|
optimize: requirements
|
|
$(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG)
|
|
|
|
## Start FastAPI backend server (port 8000)
|
|
.PHONY: api
|
|
api: requirements
|
|
uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
|
|
|
|
## Start Streamlit frontend app (port 8501)
|
|
.PHONY: webapp
|
|
webapp: requirements
|
|
streamlit run app/app.py --server.port 8501
|
|
|
|
## Start both API and webapp (run in separate terminals)
|
|
.PHONY: serve
|
|
serve:
|
|
@echo "请在两个终端分别运行:"
|
|
@echo " 终端 1: make api"
|
|
@echo " 终端 2: make webapp"
|
|
@echo ""
|
|
@echo "然后访问: http://localhost:8501"
|
|
|
|
#################################################################################
|
|
# DOCKER #
|
|
#################################################################################
|
|
|
|
## Build Docker images
|
|
.PHONY: docker-build
|
|
docker-build:
|
|
docker compose build
|
|
|
|
## Start all services with Docker Compose
|
|
.PHONY: docker-up
|
|
docker-up:
|
|
docker compose up -d
|
|
|
|
## Stop all Docker services
|
|
.PHONY: docker-down
|
|
docker-down:
|
|
docker compose down
|
|
|
|
## View Docker logs
|
|
.PHONY: docker-logs
|
|
docker-logs:
|
|
docker compose logs -f
|
|
|
|
## Build and start all services
|
|
.PHONY: docker-serve
|
|
docker-serve: docker-build docker-up
|
|
@echo ""
|
|
@echo "🚀 服务已启动!"
|
|
@echo " - API: http://localhost:8000"
|
|
@echo " - Web 应用: http://localhost:8501"
|
|
@echo ""
|
|
@echo "查看日志: make docker-logs"
|
|
@echo "停止服务: make docker-down"
|
|
|
|
## Clean Docker resources (images, volumes, etc.)
|
|
.PHONY: docker-clean
|
|
docker-clean:
|
|
docker compose down -v --rmi local
|
|
docker system prune -f
|
|
|
|
#################################################################################
|
|
# Self Documenting Commands #
|
|
#################################################################################
|
|
|
|
.DEFAULT_GOAL := help
|
|
|
|
define PRINT_HELP_PYSCRIPT
|
|
import re, sys; \
|
|
lines = '\n'.join([line for line in sys.stdin]); \
|
|
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
|
|
print('Available rules:\n'); \
|
|
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
|
|
endef
|
|
export PRINT_HELP_PYSCRIPT
|
|
|
|
help:
|
|
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
|