mirror of
https://github.com/RYDE-WORK/lnp_ml.git
synced 2026-03-21 01:27:00 +08:00
201 lines
6.6 KiB
Makefile
201 lines
6.6 KiB
Makefile
#################################################################################
|
||
# GLOBALS #
|
||
#################################################################################
|
||
|
||
PROJECT_NAME = lnp-ml
|
||
PYTHON_VERSION = 3.8
|
||
PYTHON_INTERPRETER = python
|
||
|
||
#################################################################################
|
||
# COMMANDS #
|
||
#################################################################################
|
||
|
||
|
||
## Install Python dependencies
|
||
.PHONY: requirements
|
||
requirements:
|
||
pixi install
|
||
|
||
|
||
|
||
|
||
## Delete all compiled Python files
|
||
.PHONY: clean
|
||
clean:
|
||
find . -type f -name "*.py[co]" -delete
|
||
find . -type d -name "__pycache__" -delete
|
||
|
||
|
||
## Lint using ruff (use `make format` to do formatting)
|
||
.PHONY: lint
|
||
lint:
|
||
ruff format --check
|
||
ruff check
|
||
|
||
## Format source code with ruff
|
||
.PHONY: format
|
||
format:
|
||
ruff check --fix
|
||
ruff format
|
||
|
||
|
||
|
||
|
||
|
||
## Set up Python interpreter environment
|
||
.PHONY: create_environment
|
||
create_environment:
|
||
|
||
@echo ">>> Pixi environment will be created when running 'make requirements'"
|
||
|
||
@echo ">>> Activate with:\npixi shell"
|
||
|
||
|
||
|
||
|
||
#################################################################################
|
||
# PROJECT RULES #
|
||
#################################################################################
|
||
|
||
|
||
## Clean raw data (raw -> interim)
|
||
.PHONY: clean_data
|
||
clean_data: requirements
|
||
$(PYTHON_INTERPRETER) scripts/data_cleaning.py
|
||
|
||
## Process dataset (interim -> processed)
|
||
.PHONY: data
|
||
data: requirements
|
||
$(PYTHON_INTERPRETER) scripts/process_data.py
|
||
|
||
## Process dataset for final training (interim -> processed/final, train:val=9:1, no test)
|
||
.PHONY: data_final
|
||
data_final: requirements
|
||
$(PYTHON_INTERPRETER) scripts/process_data_final.py
|
||
|
||
## Process external data for pretrain (external -> processed)
|
||
.PHONY: data_pretrain
|
||
data_pretrain: requirements
|
||
$(PYTHON_INTERPRETER) scripts/process_external.py
|
||
|
||
## Process CV data for cross-validation pretrain (external/all_amine_split_for_LiON -> processed/cv)
|
||
.PHONY: data_pretrain_cv
|
||
data_pretrain_cv: requirements
|
||
$(PYTHON_INTERPRETER) scripts/process_external_cv.py
|
||
|
||
## Process internal data with CV splitting (interim -> processed/cv)
|
||
## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
|
||
SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
|
||
|
||
.PHONY: data_cv
|
||
data_cv: requirements
|
||
$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
|
||
|
||
# MPNN 支持:使用 USE_MPNN=1 启用 MPNN encoder
|
||
# 例如:make pretrain USE_MPNN=1
|
||
MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
|
||
|
||
# Backbone 冻结:使用 FREEZE_BACKBONE=1 冻结 backbone,只训练 heads
|
||
# 例如:make finetune FREEZE_BACKBONE=1
|
||
FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)
|
||
|
||
# 设备选择:使用 DEVICE=xxx 指定设备
|
||
# 例如:make train DEVICE=cuda:0 或 make test_cv DEVICE=mps
|
||
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
|
||
|
||
## Pretrain on external data (delivery only)
|
||
.PHONY: pretrain
|
||
pretrain: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Evaluate pretrain model (delivery metrics)
|
||
.PHONY: test_pretrain
|
||
test_pretrain: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Pretrain with cross-validation (5-fold)
|
||
.PHONY: pretrain_cv
|
||
pretrain_cv: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
|
||
.PHONY: test_pretrain_cv
|
||
test_pretrain_cv: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)
|
||
|
||
## Train model (multi-task, from scratch)
|
||
.PHONY: train
|
||
train: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Finetune from pretrained checkpoint (use FREEZE_BACKBONE=1 to freeze backbone)
|
||
.PHONY: finetune
|
||
finetune: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Final training using all data (train:val=9:1, no test set), with pretrained weights
|
||
.PHONY: train_final
|
||
train_final: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train \
|
||
--train-path data/processed/final/train.parquet \
|
||
--val-path data/processed/final/val.parquet \
|
||
--output-dir models/final \
|
||
--init-from-pretrain models/pretrain_delivery.pt \
|
||
$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights
|
||
.PHONY: finetune_cv
|
||
finetune_cv: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Train with cross-validation on internal data only (5-fold, amine-based split)
|
||
.PHONY: train_cv
|
||
train_cv: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
|
||
## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
|
||
.PHONY: test_cv
|
||
test_cv: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)
|
||
|
||
## Train with hyperparameter tuning
|
||
.PHONY: tune
|
||
tune: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
|
||
|
||
## Run predictions
|
||
.PHONY: predict
|
||
predict: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)
|
||
|
||
## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
|
||
.PHONY: test
|
||
test: requirements
|
||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
|
||
|
||
## Formulation optimization: find optimal LNP formulation for target organ
|
||
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
|
||
.PHONY: optimize
|
||
optimize: requirements
|
||
$(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG)
|
||
|
||
|
||
#################################################################################
|
||
# Self Documenting Commands #
|
||
#################################################################################
|
||
|
||
.DEFAULT_GOAL := help
|
||
|
||
define PRINT_HELP_PYSCRIPT
|
||
import re, sys; \
|
||
lines = '\n'.join([line for line in sys.stdin]); \
|
||
matches = re.findall(r'\n## (.*)\n[\s\S]+?\n([a-zA-Z_-]+):', lines); \
|
||
print('Available rules:\n'); \
|
||
print('\n'.join(['{:25}{}'.format(*reversed(match)) for match in matches]))
|
||
endef
|
||
export PRINT_HELP_PYSCRIPT
|
||
|
||
help:
|
||
@$(PYTHON_INTERPRETER) -c "${PRINT_HELP_PYSCRIPT}" < $(MAKEFILE_LIST)
|