mirror of
https://github.com/RYDE-WORK/lnp_ml.git
synced 2026-03-21 09:36:32 +08:00
Compare commits
22 Commits
6e4f85c5a8
...
5730490642
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5730490642 | ||
|
|
0b90caef1d | ||
|
|
a9392aa780 | ||
|
|
3f33f9d233 | ||
|
|
c225fc67a7 | ||
|
|
3cce4c9373 | ||
|
|
68119df128 | ||
|
|
75e1dcb0eb | ||
|
|
982e98cced | ||
|
|
39a14e4274 | ||
|
|
93a6f8654d | ||
|
|
a56637c8ac | ||
|
|
871afc5988 | ||
|
|
ac4246c2b7 | ||
|
|
47bbb64c66 | ||
|
|
039be54c5a | ||
|
|
e6a5e5495a | ||
|
|
e123fc8f3e | ||
|
|
c392b48994 | ||
|
|
e1c85c83ba | ||
|
|
a2bfb26dfc | ||
|
|
6773929ea2 |
75
.dockerignore
Normal file
75
.dockerignore
Normal file
@ -0,0 +1,75 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
dist/
|
||||
build/
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
env/
|
||||
.pixi/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
.cursor/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Data (不需要打包到镜像)
|
||||
data/
|
||||
!data/.gitkeep
|
||||
|
||||
# Notebooks
|
||||
notebooks/
|
||||
*.ipynb
|
||||
|
||||
# Documentation
|
||||
docs/
|
||||
|
||||
# Reports
|
||||
reports/
|
||||
|
||||
# References
|
||||
references/
|
||||
|
||||
# Scripts (训练脚本不需要)
|
||||
scripts/
|
||||
|
||||
# Lock files
|
||||
pixi.lock
|
||||
|
||||
# Tests
|
||||
tests/
|
||||
.pytest_cache/
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
.DS_Store
|
||||
|
||||
# Models (will be mounted as volume or copied explicitly)
|
||||
# Note: models/final/ is copied in Dockerfile
|
||||
models/finetune_cv/
|
||||
models/pretrain_cv/
|
||||
models/mpnn/
|
||||
models/*.pt
|
||||
models/*.json
|
||||
!models/final/
|
||||
|
||||
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
63
Dockerfile
Normal file
63
Dockerfile
Normal file
@ -0,0 +1,63 @@
|
||||
# LNP-ML Docker Image
|
||||
# 多阶段构建,支持 API 和 Streamlit 两种服务
|
||||
|
||||
FROM python:3.8-slim AS base
|
||||
|
||||
# 设置环境变量
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PIP_NO_CACHE_DIR=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 安装系统依赖
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
libxrender1 \
|
||||
libxext6 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 复制依赖文件
|
||||
COPY requirements.txt .
|
||||
|
||||
# 安装 Python 依赖
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install -r requirements.txt
|
||||
|
||||
# 复制项目代码
|
||||
COPY pyproject.toml .
|
||||
COPY README.md .
|
||||
COPY LICENSE .
|
||||
COPY lnp_ml/ ./lnp_ml/
|
||||
COPY app/ ./app/
|
||||
|
||||
# 安装项目包
|
||||
RUN pip install -e .
|
||||
|
||||
# 复制模型文件
|
||||
COPY models/final/ ./models/final/
|
||||
|
||||
# ============ API 服务 ============
|
||||
FROM base AS api
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
ENV MODEL_PATH=/app/models/final/model.pt
|
||||
|
||||
CMD ["uvicorn", "app.api:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
||||
# ============ Streamlit 服务 ============
|
||||
FROM base AS streamlit
|
||||
|
||||
EXPOSE 8501
|
||||
|
||||
# Streamlit 配置
|
||||
ENV STREAMLIT_SERVER_PORT=8501 \
|
||||
STREAMLIT_SERVER_ADDRESS=0.0.0.0 \
|
||||
STREAMLIT_SERVER_HEADLESS=true \
|
||||
STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
|
||||
|
||||
CMD ["streamlit", "run", "app/app.py"]
|
||||
|
||||
179
Makefile
179
Makefile
@ -68,11 +68,29 @@ clean_data: requirements
|
||||
data: requirements
|
||||
$(PYTHON_INTERPRETER) scripts/process_data.py
|
||||
|
||||
## Process dataset for final training (interim -> processed/final, train:val=9:1, no test)
|
||||
.PHONY: data_final
|
||||
data_final: requirements
|
||||
$(PYTHON_INTERPRETER) scripts/process_data_final.py
|
||||
|
||||
## Process external data for pretrain (external -> processed)
|
||||
.PHONY: data_pretrain
|
||||
data_pretrain: requirements
|
||||
$(PYTHON_INTERPRETER) scripts/process_external.py
|
||||
|
||||
## Process CV data for cross-validation pretrain (external/all_amine_split_for_LiON -> processed/cv)
|
||||
.PHONY: data_pretrain_cv
|
||||
data_pretrain_cv: requirements
|
||||
$(PYTHON_INTERPRETER) scripts/process_external_cv.py
|
||||
|
||||
## Process internal data with CV splitting (interim -> processed/cv)
|
||||
## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
|
||||
SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
|
||||
|
||||
.PHONY: data_cv
|
||||
data_cv: requirements
|
||||
$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
|
||||
|
||||
# MPNN 支持:使用 USE_MPNN=1 启用 MPNN encoder
|
||||
# 例如:make pretrain USE_MPNN=1
|
||||
MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
|
||||
@ -81,40 +99,185 @@ MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
|
||||
# 例如:make finetune FREEZE_BACKBONE=1
|
||||
FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)
|
||||
|
||||
# 设备选择:使用 DEVICE=xxx 指定设备
|
||||
# 例如:make train DEVICE=cuda:0 或 make test_cv DEVICE=mps
|
||||
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
|
||||
|
||||
## Pretrain on external data (delivery only)
|
||||
.PHONY: pretrain
|
||||
pretrain: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG)
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Evaluate pretrain model (delivery metrics)
|
||||
.PHONY: test_pretrain
|
||||
test_pretrain: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG)
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Pretrain with cross-validation (5-fold)
|
||||
.PHONY: pretrain_cv
|
||||
pretrain_cv: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
|
||||
.PHONY: test_pretrain_cv
|
||||
test_pretrain_cv: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)
|
||||
|
||||
## Train model (multi-task, from scratch)
|
||||
.PHONY: train
|
||||
train: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG)
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Finetune from pretrained checkpoint (use FREEZE_BACKBONE=1 to freeze backbone)
|
||||
.PHONY: finetune
|
||||
finetune: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG)
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Final training using all data (train:val=9:1, no test set), with pretrained weights
|
||||
.PHONY: train_final
|
||||
train_final: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train \
|
||||
--train-path data/processed/final/train.parquet \
|
||||
--val-path data/processed/final/val.parquet \
|
||||
--output-dir models/final \
|
||||
--init-from-pretrain models/pretrain_delivery.pt \
|
||||
$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights
|
||||
.PHONY: finetune_cv
|
||||
finetune_cv: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
## Train with cross-validation on internal data only (5-fold, amine-based split)
|
||||
.PHONY: train_cv
|
||||
train_cv: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
|
||||
## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
|
||||
.PHONY: test_cv
|
||||
test_cv: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)
|
||||
|
||||
## Train with hyperparameter tuning
|
||||
.PHONY: tune
|
||||
tune: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG)
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
|
||||
|
||||
# ============ 嵌套 CV + Optuna 调参(StratifiedKFold + 类权重) ============
|
||||
# 通用参数:
|
||||
# SEED: 随机种子 (默认: 42)
|
||||
# N_TRIALS: Optuna 试验数 (默认: 20)
|
||||
# EPOCHS_PER_TRIAL: 每个试验的最大 epoch (默认: 30)
|
||||
# MIN_STRATUM_COUNT: 复合分层标签的最小样本数 (默认: 5)
|
||||
# OUTPUT_DIR: 输出目录 (根据命令有不同默认值)
|
||||
# INIT_PRETRAIN: 预训练权重路径 (默认: models/pretrain_delivery.pt)
|
||||
|
||||
SEED_FLAG = $(if $(SEED),--seed $(SEED),)
|
||||
N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
|
||||
EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
|
||||
MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
|
||||
OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
|
||||
USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
|
||||
# 默认使用预训练权重,设置 NO_PRETRAIN=1 可禁用
|
||||
INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
|
||||
|
||||
## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune)
|
||||
## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索
|
||||
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
|
||||
## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30
|
||||
.PHONY: nested_cv_tune
|
||||
nested_cv_tune: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.nested_cv_optuna \
|
||||
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
||||
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG)
|
||||
|
||||
## Final training with Optuna: 3-fold CV tune + full data train
|
||||
## 用于最终模型训练:3-fold 调参后用全量数据训练(无 early-stop)
|
||||
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
|
||||
## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1
|
||||
.PHONY: final_optuna
|
||||
final_optuna: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.final_train_optuna_cv \
|
||||
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
|
||||
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)
|
||||
|
||||
## Run predictions
|
||||
.PHONY: predict
|
||||
predict: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)
|
||||
|
||||
## Test model on test set (with detailed metrics)
|
||||
## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
|
||||
.PHONY: test
|
||||
test: requirements
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test
|
||||
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
|
||||
|
||||
## Formulation optimization: find optimal LNP formulation for target organ
|
||||
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
|
||||
.PHONY: optimize
|
||||
optimize: requirements
|
||||
$(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG)
|
||||
|
||||
## Start FastAPI backend server (port 8000)
|
||||
.PHONY: api
|
||||
api: requirements
|
||||
uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
|
||||
## Start Streamlit frontend app (port 8501)
|
||||
.PHONY: webapp
|
||||
webapp: requirements
|
||||
streamlit run app/app.py --server.port 8501
|
||||
|
||||
## Start both API and webapp (run in separate terminals)
|
||||
.PHONY: serve
|
||||
serve:
|
||||
@echo "请在两个终端分别运行:"
|
||||
@echo " 终端 1: make api"
|
||||
@echo " 终端 2: make webapp"
|
||||
@echo ""
|
||||
@echo "然后访问: http://localhost:8501"
|
||||
|
||||
|
||||
#################################################################################
|
||||
# DOCKER COMMANDS #
|
||||
#################################################################################
|
||||
|
||||
## Build Docker images
|
||||
.PHONY: docker-build
|
||||
docker-build:
|
||||
docker compose build
|
||||
|
||||
## Start all services with Docker Compose
|
||||
.PHONY: docker-up
|
||||
docker-up:
|
||||
docker compose up -d
|
||||
|
||||
## Stop all Docker services
|
||||
.PHONY: docker-down
|
||||
docker-down:
|
||||
docker compose down
|
||||
|
||||
## View Docker logs
|
||||
.PHONY: docker-logs
|
||||
docker-logs:
|
||||
docker compose logs -f
|
||||
|
||||
## Build and start all services
|
||||
.PHONY: docker-serve
|
||||
docker-serve: docker-build docker-up
|
||||
@echo ""
|
||||
@echo "🚀 服务已启动!"
|
||||
@echo " - API: http://localhost:8000"
|
||||
@echo " - Web 应用: http://localhost:8501"
|
||||
@echo ""
|
||||
@echo "查看日志: make docker-logs"
|
||||
@echo "停止服务: make docker-down"
|
||||
|
||||
## Clean Docker resources (images, volumes, etc.)
|
||||
.PHONY: docker-clean
|
||||
docker-clean:
|
||||
docker compose down -v --rmi local
|
||||
docker system prune -f
|
||||
|
||||
|
||||
#################################################################################
|
||||
|
||||
@ -156,5 +156,12 @@ python -m lnp_ml.modeling.train \
|
||||
└── plots.py <- Code to create visualizations
|
||||
```
|
||||
|
||||
|
||||
### 配方筛选
|
||||
|
||||
```
|
||||
make optimize SMILES="CC(C)NCCNC(C)C" ORGAN=liver
|
||||
```
|
||||
|
||||
--------
|
||||
|
||||
|
||||
62
app/PARAM.md
Normal file
62
app/PARAM.md
Normal file
@ -0,0 +1,62 @@
|
||||
## Possible Values
|
||||
|
||||
# comp token([B, 5], the sum of the latter four ratio is always 1)
|
||||
Cationic_Lipid_to_mRNA_weight_ratio(float, Min: 0.05, Max: 0.3, Step Size: 0.01)
|
||||
Cationic_Lipid_Mol_Ratio(float, Min: 0.05, Max: 0.8, Step Size: 0.01)
|
||||
Phospholipid_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
|
||||
Cholesterol_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
|
||||
PEG_Lipid_Mol_Ratio(float, Min: 0, Max: 0.05, Step Size: 0.01)
|
||||
|
||||
# phys token([B, 12])
|
||||
Purity_Pure(one-hot for Purity, always Pure)
|
||||
Purity_Crude(one-hot for Purity, always Pure)
|
||||
Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
|
||||
Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
|
||||
Cargo_type_mRNA(one-hot for Cargo_type, always mRNA)
|
||||
Cargo_type_pDNA(one-hot for Cargo_type, always mRNA)
|
||||
Cargo_type_siRNA(one-hot for Cargo_type, always mRNA)
|
||||
Target_or_delivered_gene_FFL(one-hot for Target_or_delivered_gene, always FFL)
|
||||
Target_or_delivered_gene_Peptide_barcode(one-hot for Target_or_delivered_gene, always FFL)
|
||||
Target_or_delivered_gene_hEPO(one-hot for Target_or_delivered_gene, always FFL)
|
||||
Target_or_delivered_gene_FVII(one-hot for Target_or_delivered_gene, always FFL)
|
||||
Target_or_delivered_gene_GFP(one-hot for Target_or_delivered_gene, always FFL)
|
||||
|
||||
# help token([B, 4])
|
||||
Helper_lipid_ID_DOPE(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
|
||||
Helper_lipid_ID_DOTAP(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
|
||||
Helper_lipid_ID_DSPC(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
|
||||
Helper_lipid_ID_MDOA(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
|
||||
|
||||
# exp token([B, 32])
|
||||
Model_type_A549(one-hot for Model_type, always Mouse)
|
||||
Model_type_BDMC(one-hot for Model_type, always Mouse)
|
||||
Model_type_BMDM(one-hot for Model_type, always Mouse)
|
||||
Model_type_HBEC_ALI(one-hot for Model_type, always Mouse)
|
||||
Model_type_HEK293T(one-hot for Model_type, always Mouse)
|
||||
Model_type_HeLa(one-hot for Model_type, always Mouse)
|
||||
Model_type_IGROV1(one-hot for Model_type, always Mouse)
|
||||
Model_type_Mouse(one-hot for Model_type, always Mouse)
|
||||
Model_type_RAW264p7(one-hot for Model_type, always Mouse)
|
||||
Delivery_target_dendritic_cell(one-hot for Delivery_target, always body)
|
||||
Delivery_target_generic_cell(one-hot for Delivery_target, always body)
|
||||
Delivery_target_liver(one-hot for Delivery_target, always body)
|
||||
Delivery_target_lung(one-hot for Delivery_target, always body)
|
||||
Delivery_target_lung_epithelium(one-hot for Delivery_target, always body)
|
||||
Delivery_target_macrophage(one-hot for Delivery_target, always body)
|
||||
Delivery_target_muscle(one-hot for Delivery_target, always body)
|
||||
Delivery_target_spleen(one-hot for Delivery_target, always body)
|
||||
Delivery_target_body(one-hot for Delivery_target, always body)
|
||||
Route_of_administration_in_vitro(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
|
||||
Route_of_administration_intravenous(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
|
||||
Route_of_administration_intramuscular(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
|
||||
Route_of_administration_intratracheal(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
|
||||
Sample_organization_type_individual(one-hot for Sample_organization_type, always Individual)
|
||||
Sample_organization_type_barcoded(one-hot for Sample_organization_type, always Individual)
|
||||
Value_name_log_luminescence(one-hot for Value_name, always luminescence)
|
||||
Value_name_luminescence(one-hot for Value_name, always luminescence)
|
||||
Value_name_FFL_silencing(one-hot for Value_name, always luminescence)
|
||||
Value_name_Peptide_abundance(one-hot for Value_name, always luminescence)
|
||||
Value_name_hEPO(one-hot for Value_name, always luminescence)
|
||||
Value_name_FVII_silencing(one-hot for Value_name, always luminescence)
|
||||
Value_name_GFP_delivery(one-hot for Value_name, always luminescence)
|
||||
Value_name_Discretized_luminescence(one-hot for Value_name, always luminescence)
|
||||
15
app/SCORE.md
Normal file
15
app/SCORE.md
Normal file
@ -0,0 +1,15 @@
|
||||
## regression
|
||||
biodistribution(selected organ only): score = y * weight, where weight=0.3
|
||||
quantified_delivery: score = (y-min)/(max-min)*weight, where weight=0.25, (min=-0.798559291, max=4.497814051056962) when route_of_administration=intravenous, (min=-0.794912427, max=10.220042980012716) when route_of_administration=intramuscular
|
||||
size: score = 0 * weight if y<60, 1 * weight if 60<=y<=150, 0 * weight if y>150, where weight=0.05
|
||||
|
||||
## classification
|
||||
encapsulation_efficiency_0: score = weight, where weight=0
|
||||
encapsulation_efficiency_1: score = weight, where weight=0.02
|
||||
encapsulation_efficiency_2: score = weight, where weight=0.08
|
||||
pdi_0: score = weight, where weight=0.08
|
||||
pdi_1: score = weight, where weight=0.02
|
||||
pdi_2: score = weight, where weight=0
|
||||
pdi_3: score = weight, where weight=0
|
||||
toxicity_0: score=weight, where weight=0.2
|
||||
toxicity_1: score=weight, where weight=0
|
||||
2
app/__init__.py
Normal file
2
app/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
"""LNP 配方优化应用"""
|
||||
|
||||
361
app/api.py
Normal file
361
app/api.py
Normal file
@ -0,0 +1,361 @@
|
||||
"""
|
||||
FastAPI 配方优化 API
|
||||
|
||||
启动服务:
|
||||
uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
import torch
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from loguru import logger
|
||||
|
||||
from lnp_ml.config import MODELS_DIR
|
||||
from lnp_ml.modeling.predict import load_model
|
||||
from app.optimize import (
|
||||
optimize,
|
||||
format_results,
|
||||
AVAILABLE_ORGANS,
|
||||
TARGET_BIODIST,
|
||||
CompRanges,
|
||||
ScoringWeights,
|
||||
)
|
||||
|
||||
|
||||
# ============ Pydantic Models ============
|
||||
|
||||
class CompRangesRequest(BaseModel):
|
||||
"""组分范围配置"""
|
||||
weight_ratio_min: float = Field(default=0.05, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最小值")
|
||||
weight_ratio_max: float = Field(default=0.30, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最大值")
|
||||
cationic_mol_min: float = Field(default=0.05, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最小值")
|
||||
cationic_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最大值")
|
||||
phospholipid_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="磷脂 mol 比例最小值")
|
||||
phospholipid_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="磷脂 mol 比例最大值")
|
||||
cholesterol_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="胆固醇 mol 比例最小值")
|
||||
cholesterol_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="胆固醇 mol 比例最大值")
|
||||
peg_mol_min: float = Field(default=0.00, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最小值")
|
||||
peg_mol_max: float = Field(default=0.05, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最大值")
|
||||
|
||||
def to_comp_ranges(self) -> CompRanges:
|
||||
"""转换为 CompRanges 对象"""
|
||||
return CompRanges(
|
||||
weight_ratio_min=self.weight_ratio_min,
|
||||
weight_ratio_max=self.weight_ratio_max,
|
||||
cationic_mol_min=self.cationic_mol_min,
|
||||
cationic_mol_max=self.cationic_mol_max,
|
||||
phospholipid_mol_min=self.phospholipid_mol_min,
|
||||
phospholipid_mol_max=self.phospholipid_mol_max,
|
||||
cholesterol_mol_min=self.cholesterol_mol_min,
|
||||
cholesterol_mol_max=self.cholesterol_mol_max,
|
||||
peg_mol_min=self.peg_mol_min,
|
||||
peg_mol_max=self.peg_mol_max,
|
||||
)
|
||||
|
||||
|
||||
class ScoringWeightsRequest(BaseModel):
|
||||
"""评分权重配置"""
|
||||
biodist_weight: float = Field(default=1.0, ge=0.0, description="目标器官分布权重")
|
||||
delivery_weight: float = Field(default=0.0, ge=0.0, description="量化递送权重")
|
||||
size_weight: float = Field(default=0.0, ge=0.0, description="粒径权重 (80-150nm)")
|
||||
ee_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0], description="EE 分类权重 [class0, class1, class2]")
|
||||
pdi_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0, 0.0], description="PDI 分类权重 [class0, class1, class2, class3]")
|
||||
toxic_class_weights: List[float] = Field(default=[0.0, 0.0], description="毒性分类权重 [无毒, 有毒]")
|
||||
|
||||
def to_scoring_weights(self) -> ScoringWeights:
|
||||
"""转换为 ScoringWeights 对象"""
|
||||
return ScoringWeights(
|
||||
biodist_weight=self.biodist_weight,
|
||||
delivery_weight=self.delivery_weight,
|
||||
size_weight=self.size_weight,
|
||||
ee_class_weights=self.ee_class_weights,
|
||||
pdi_class_weights=self.pdi_class_weights,
|
||||
toxic_class_weights=self.toxic_class_weights,
|
||||
)
|
||||
|
||||
|
||||
class OptimizeRequest(BaseModel):
|
||||
"""优化请求"""
|
||||
smiles: str = Field(..., description="Cationic lipid SMILES string")
|
||||
organ: str = Field(..., description="Target organ for optimization")
|
||||
top_k: int = Field(default=20, ge=1, le=100, description="Number of top formulations to return")
|
||||
num_seeds: Optional[int] = Field(default=None, ge=1, le=500, description="Number of seed points from first iteration (default: top_k * 5)")
|
||||
top_per_seed: int = Field(default=1, ge=1, le=10, description="Number of local best to keep per seed in refinement")
|
||||
step_sizes: Optional[List[float]] = Field(default=None, description="Step sizes for each iteration (default: [0.10, 0.02, 0.01])")
|
||||
comp_ranges: Optional[CompRangesRequest] = Field(default=None, description="组分范围配置(默认使用标准范围)")
|
||||
routes: Optional[List[str]] = Field(default=None, description="给药途径列表 (default: ['intravenous', 'intramuscular'])")
|
||||
scoring_weights: Optional[ScoringWeightsRequest] = Field(default=None, description="评分权重配置(默认仅按 biodist 排序)")
|
||||
|
||||
class Config:
|
||||
json_schema_extra = {
|
||||
"example": {
|
||||
"smiles": "CC(C)NCCNC(C)C",
|
||||
"organ": "liver",
|
||||
"top_k": 20,
|
||||
"num_seeds": None,
|
||||
"top_per_seed": 1,
|
||||
"step_sizes": None,
|
||||
"comp_ranges": None,
|
||||
"routes": None,
|
||||
"scoring_weights": None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class FormulationResult(BaseModel):
|
||||
"""单个配方结果"""
|
||||
rank: int
|
||||
target_biodist: float
|
||||
composite_score: Optional[float] = None # 综合评分
|
||||
cationic_lipid_to_mrna_ratio: float
|
||||
cationic_lipid_mol_ratio: float
|
||||
phospholipid_mol_ratio: float
|
||||
cholesterol_mol_ratio: float
|
||||
peg_lipid_mol_ratio: float
|
||||
helper_lipid: str
|
||||
route: str
|
||||
all_biodist: Dict[str, float]
|
||||
# 额外预测值
|
||||
quantified_delivery: Optional[float] = None
|
||||
size: Optional[float] = None
|
||||
pdi_class: Optional[int] = None # PDI 分类 (0: <0.2, 1: 0.2-0.3, 2: 0.3-0.4, 3: >0.4)
|
||||
ee_class: Optional[int] = None # EE 分类 (0: <80%, 1: 80-90%, 2: >90%)
|
||||
toxic_class: Optional[int] = None # 毒性分类 (0: 无毒, 1: 有毒)
|
||||
|
||||
|
||||
class OptimizeResponse(BaseModel):
|
||||
"""优化响应"""
|
||||
smiles: str
|
||||
target_organ: str
|
||||
formulations: List[FormulationResult]
|
||||
message: str
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
"""健康检查响应"""
|
||||
status: str
|
||||
model_loaded: bool
|
||||
device: str
|
||||
available_organs: List[str]
|
||||
|
||||
|
||||
# ============ Global State ============
|
||||
|
||||
class ModelState:
|
||||
"""模型状态管理"""
|
||||
model = None
|
||||
device = None
|
||||
model_path = None
|
||||
|
||||
|
||||
state = ModelState()
|
||||
|
||||
|
||||
# ============ Lifespan ============
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""应用生命周期管理:启动时加载模型"""
|
||||
# Startup
|
||||
logger.info("Starting API server...")
|
||||
|
||||
# 确定设备
|
||||
if torch.cuda.is_available():
|
||||
device_str = "cuda"
|
||||
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
||||
device_str = "mps"
|
||||
else:
|
||||
device_str = "cpu"
|
||||
|
||||
# 可通过环境变量覆盖
|
||||
device_str = os.environ.get("DEVICE", device_str)
|
||||
state.device = torch.device(device_str)
|
||||
logger.info(f"Using device: {state.device}")
|
||||
|
||||
# 加载模型
|
||||
model_path = Path(os.environ.get("MODEL_PATH", MODELS_DIR / "final" / "model.pt"))
|
||||
state.model_path = model_path
|
||||
|
||||
logger.info(f"Loading model from {model_path}...")
|
||||
try:
|
||||
state.model = load_model(model_path, state.device)
|
||||
logger.success("Model loaded successfully!")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model: {e}")
|
||||
raise
|
||||
|
||||
yield
|
||||
|
||||
# Shutdown
|
||||
logger.info("Shutting down API server...")
|
||||
state.model = None
|
||||
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
||||
|
||||
|
||||
# ============ FastAPI App ============
|
||||
|
||||
app = FastAPI(
|
||||
title="LNP 配方优化 API",
|
||||
description="基于深度学习的 LNP 纳米颗粒配方优化服务",
|
||||
version="1.0.0",
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
# CORS 配置
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# ============ Endpoints ============
|
||||
|
||||
@app.get("/", response_model=HealthResponse)
|
||||
async def health_check():
|
||||
"""健康检查"""
|
||||
return HealthResponse(
|
||||
status="healthy" if state.model is not None else "model_not_loaded",
|
||||
model_loaded=state.model is not None,
|
||||
device=str(state.device),
|
||||
available_organs=AVAILABLE_ORGANS,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/organs", response_model=List[str])
|
||||
async def get_available_organs():
|
||||
"""获取可用的目标器官列表"""
|
||||
return AVAILABLE_ORGANS
|
||||
|
||||
|
||||
@app.post("/optimize", response_model=OptimizeResponse)
|
||||
async def optimize_formulation(request: OptimizeRequest):
|
||||
"""
|
||||
执行配方优化
|
||||
|
||||
通过迭代式 Grid Search 寻找最大化目标器官 Biodistribution 的最优配方。
|
||||
"""
|
||||
# 验证模型状态
|
||||
if state.model is None:
|
||||
raise HTTPException(status_code=503, detail="Model not loaded")
|
||||
|
||||
# 验证器官
|
||||
if request.organ not in AVAILABLE_ORGANS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid organ: {request.organ}. Available: {AVAILABLE_ORGANS}"
|
||||
)
|
||||
|
||||
# 验证 SMILES
|
||||
if not request.smiles or len(request.smiles.strip()) == 0:
|
||||
raise HTTPException(status_code=400, detail="SMILES string cannot be empty")
|
||||
|
||||
# 验证 routes
|
||||
valid_routes = ["intravenous", "intramuscular"]
|
||||
if request.routes is not None:
|
||||
for r in request.routes:
|
||||
if r not in valid_routes:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid route: {r}. Available: {valid_routes}"
|
||||
)
|
||||
if len(request.routes) == 0:
|
||||
raise HTTPException(status_code=400, detail="At least one route must be specified")
|
||||
|
||||
logger.info(f"Optimization request: organ={request.organ}, routes={request.routes}, smiles={request.smiles[:50]}...")
|
||||
|
||||
# 构建组分范围配置(在 try 块外验证,确保返回 400 而非 500)
|
||||
comp_ranges = None
|
||||
if request.comp_ranges is not None:
|
||||
comp_ranges = request.comp_ranges.to_comp_ranges()
|
||||
# 验证范围是否合理
|
||||
validation_error = comp_ranges.get_validation_error()
|
||||
if validation_error:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"组分范围配置无效: {validation_error}"
|
||||
)
|
||||
|
||||
# 构建评分权重配置
|
||||
scoring_weights = None
|
||||
if request.scoring_weights is not None:
|
||||
scoring_weights = request.scoring_weights.to_scoring_weights()
|
||||
|
||||
try:
|
||||
# 执行优化(层级搜索策略)
|
||||
results = optimize(
|
||||
smiles=request.smiles,
|
||||
organ=request.organ,
|
||||
model=state.model,
|
||||
device=state.device,
|
||||
top_k=request.top_k,
|
||||
num_seeds=request.num_seeds,
|
||||
top_per_seed=request.top_per_seed,
|
||||
step_sizes=request.step_sizes,
|
||||
comp_ranges=comp_ranges,
|
||||
routes=request.routes,
|
||||
scoring_weights=scoring_weights,
|
||||
batch_size=256,
|
||||
)
|
||||
|
||||
# 用于计算综合评分的权重
|
||||
from app.optimize import compute_formulation_score, DEFAULT_SCORING_WEIGHTS
|
||||
actual_scoring_weights = scoring_weights if scoring_weights is not None else DEFAULT_SCORING_WEIGHTS
|
||||
|
||||
# 转换结果
|
||||
formulations = []
|
||||
for i, f in enumerate(results):
|
||||
formulations.append(FormulationResult(
|
||||
rank=i + 1,
|
||||
target_biodist=f.get_biodist(request.organ),
|
||||
composite_score=compute_formulation_score(f, request.organ, actual_scoring_weights),
|
||||
cationic_lipid_to_mrna_ratio=f.cationic_lipid_to_mrna_ratio,
|
||||
cationic_lipid_mol_ratio=f.cationic_lipid_mol_ratio,
|
||||
phospholipid_mol_ratio=f.phospholipid_mol_ratio,
|
||||
cholesterol_mol_ratio=f.cholesterol_mol_ratio,
|
||||
peg_lipid_mol_ratio=f.peg_lipid_mol_ratio,
|
||||
helper_lipid=f.helper_lipid,
|
||||
route=f.route,
|
||||
all_biodist={
|
||||
col.replace("Biodistribution_", ""): f.biodist_predictions.get(col, 0.0)
|
||||
for col in TARGET_BIODIST
|
||||
},
|
||||
# 额外预测值
|
||||
quantified_delivery=f.quantified_delivery,
|
||||
size=f.size,
|
||||
pdi_class=f.pdi_class,
|
||||
ee_class=f.ee_class,
|
||||
toxic_class=f.toxic_class,
|
||||
))
|
||||
|
||||
logger.success(f"Optimization completed: {len(formulations)} formulations")
|
||||
|
||||
return OptimizeResponse(
|
||||
smiles=request.smiles,
|
||||
target_organ=request.organ,
|
||||
formulations=formulations,
|
||||
message=f"Successfully found top {len(formulations)} formulations for {request.organ}",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Optimization failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(
|
||||
"app.api:app",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
reload=True,
|
||||
)
|
||||
|
||||
770
app/app.py
Normal file
770
app/app.py
Normal file
@ -0,0 +1,770 @@
|
||||
"""
|
||||
Streamlit 配方优化交互界面
|
||||
|
||||
启动应用:
|
||||
streamlit run app/app.py
|
||||
|
||||
Docker 环境变量:
|
||||
API_URL: API 服务地址 (默认: http://localhost:8000)
|
||||
"""
|
||||
|
||||
import io
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import httpx
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
# ============ 配置 ============
|
||||
|
||||
# 从环境变量读取 API 地址,支持 Docker 环境
|
||||
API_URL = os.environ.get("API_URL", "http://localhost:8000")
|
||||
|
||||
AVAILABLE_ORGANS = [
|
||||
"liver",
|
||||
"spleen",
|
||||
"lung",
|
||||
"heart",
|
||||
"kidney",
|
||||
"muscle",
|
||||
"lymph_nodes",
|
||||
]
|
||||
|
||||
ORGAN_LABELS = {
|
||||
"liver": "肝脏 (Liver)",
|
||||
"spleen": "脾脏 (Spleen)",
|
||||
"lung": "肺 (Lung)",
|
||||
"heart": "心脏 (Heart)",
|
||||
"kidney": "肾脏 (Kidney)",
|
||||
"muscle": "肌肉 (Muscle)",
|
||||
"lymph_nodes": "淋巴结 (Lymph Nodes)",
|
||||
}
|
||||
|
||||
AVAILABLE_ROUTES = [
|
||||
"intravenous",
|
||||
"intramuscular",
|
||||
]
|
||||
|
||||
ROUTE_LABELS = {
|
||||
"intravenous": "静脉注射 (Intravenous)",
|
||||
"intramuscular": "肌肉注射 (Intramuscular)",
|
||||
}
|
||||
|
||||
# ============ 页面配置 ============
|
||||
|
||||
st.set_page_config(
|
||||
page_title="LNP 配方优化",
|
||||
page_icon="🧬",
|
||||
layout="wide",
|
||||
initial_sidebar_state="expanded",
|
||||
)
|
||||
|
||||
# ============ 自定义样式 ============
|
||||
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* 主标题样式 */
|
||||
.main-title {
|
||||
font-size: 2.5rem;
|
||||
font-weight: 700;
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
-webkit-background-clip: text;
|
||||
-webkit-text-fill-color: transparent;
|
||||
text-align: center;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
/* 副标题样式 */
|
||||
.sub-title {
|
||||
font-size: 1.1rem;
|
||||
color: #6c757d;
|
||||
text-align: center;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
/* 结果卡片 */
|
||||
.result-card {
|
||||
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
|
||||
border-radius: 12px;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* 指标高亮 */
|
||||
.metric-highlight {
|
||||
font-size: 2rem;
|
||||
font-weight: 700;
|
||||
color: #667eea;
|
||||
}
|
||||
|
||||
/* 侧边栏样式 */
|
||||
.sidebar-section {
|
||||
background: #f8f9fa;
|
||||
border-radius: 8px;
|
||||
padding: 1rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* 状态指示器 */
|
||||
.status-online {
|
||||
color: #28a745;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.status-offline {
|
||||
color: #dc3545;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
/* 表格样式优化 */
|
||||
.dataframe {
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
|
||||
# ============ 辅助函数 ============
|
||||
|
||||
def check_api_status() -> bool:
|
||||
"""检查 API 状态"""
|
||||
try:
|
||||
with httpx.Client(timeout=5) as client:
|
||||
response = client.get(f"{API_URL}/")
|
||||
return response.status_code == 200
|
||||
except:
|
||||
return False
|
||||
|
||||
|
||||
def call_optimize_api(
|
||||
smiles: str,
|
||||
organ: str,
|
||||
top_k: int = 20,
|
||||
num_seeds: int = None,
|
||||
top_per_seed: int = 1,
|
||||
step_sizes: list = None,
|
||||
comp_ranges: dict = None,
|
||||
routes: list = None,
|
||||
scoring_weights: dict = None,
|
||||
) -> dict:
|
||||
"""调用优化 API"""
|
||||
payload = {
|
||||
"smiles": smiles,
|
||||
"organ": organ,
|
||||
"top_k": top_k,
|
||||
"num_seeds": num_seeds,
|
||||
"top_per_seed": top_per_seed,
|
||||
"step_sizes": step_sizes,
|
||||
"comp_ranges": comp_ranges,
|
||||
"routes": routes,
|
||||
"scoring_weights": scoring_weights,
|
||||
}
|
||||
|
||||
with httpx.Client(timeout=600) as client: # 10 分钟超时(自定义参数可能需要更长时间)
|
||||
response = client.post(
|
||||
f"{API_URL}/optimize",
|
||||
json=payload,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
|
||||
# PDI 分类标签
|
||||
PDI_CLASS_LABELS = {
|
||||
0: "<0.2 (优)",
|
||||
1: "0.2-0.3 (良)",
|
||||
2: "0.3-0.4 (中)",
|
||||
3: ">0.4 (差)",
|
||||
}
|
||||
|
||||
# EE 分类标签
|
||||
EE_CLASS_LABELS = {
|
||||
0: "<50% (低)",
|
||||
1: "50-80% (中)",
|
||||
2: ">80% (高)",
|
||||
}
|
||||
|
||||
# 毒性分类标签
|
||||
TOXIC_CLASS_LABELS = {
|
||||
0: "无毒 ✓",
|
||||
1: "有毒 ⚠",
|
||||
}
|
||||
|
||||
|
||||
def format_results_dataframe(results: dict, smiles_label: str = None) -> pd.DataFrame:
|
||||
"""将 API 结果转换为 DataFrame"""
|
||||
formulations = results["formulations"]
|
||||
target_organ = results["target_organ"]
|
||||
|
||||
rows = []
|
||||
for f in formulations:
|
||||
row = {}
|
||||
|
||||
# 如果有 SMILES 标签,添加到首列
|
||||
if smiles_label:
|
||||
row["SMILES"] = smiles_label
|
||||
|
||||
row.update({
|
||||
"排名": f["rank"],
|
||||
})
|
||||
# 如果有综合评分,显示在排名后面
|
||||
if f.get("composite_score") is not None:
|
||||
row["综合评分"] = f"{f['composite_score']:.4f}"
|
||||
row.update({
|
||||
f"{target_organ}分布": f"{f['target_biodist']*100:.8f}%",
|
||||
"阳离子脂质/mRNA比例": f["cationic_lipid_to_mrna_ratio"],
|
||||
"阳离子脂质(mol)比例": f["cationic_lipid_mol_ratio"],
|
||||
"磷脂(mol)比例": f["phospholipid_mol_ratio"],
|
||||
"胆固醇(mol)比例": f["cholesterol_mol_ratio"],
|
||||
"PEG脂质(mol)比例": f["peg_lipid_mol_ratio"],
|
||||
"辅助脂质": f["helper_lipid"],
|
||||
"给药途径": f["route"],
|
||||
})
|
||||
|
||||
# 添加额外预测值
|
||||
if f.get("quantified_delivery") is not None:
|
||||
row["量化递送"] = f"{f['quantified_delivery']:.4f}"
|
||||
if f.get("size") is not None:
|
||||
row["粒径(nm)"] = f"{f['size']:.1f}"
|
||||
if f.get("pdi_class") is not None:
|
||||
row["PDI"] = PDI_CLASS_LABELS.get(f["pdi_class"], str(f["pdi_class"]))
|
||||
if f.get("ee_class") is not None:
|
||||
row["包封率"] = EE_CLASS_LABELS.get(f["ee_class"], str(f["ee_class"]))
|
||||
if f.get("toxic_class") is not None:
|
||||
row["毒性"] = TOXIC_CLASS_LABELS.get(f["toxic_class"], str(f["toxic_class"]))
|
||||
|
||||
# 添加其他器官的 biodist
|
||||
for organ, value in f["all_biodist"].items():
|
||||
if organ != target_organ:
|
||||
row[f"{organ}分布"] = f"{value*100:.2f}%"
|
||||
rows.append(row)
|
||||
|
||||
return pd.DataFrame(rows)
|
||||
|
||||
|
||||
def create_export_csv(df: pd.DataFrame, smiles: str, organ: str) -> str:
|
||||
"""创建导出用的 CSV 内容"""
|
||||
# 添加元信息
|
||||
meta_info = f"# LNP 配方优化结果\n# SMILES: {smiles}\n# 目标器官: {organ}\n# 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
||||
csv_content = df.to_csv(index=False)
|
||||
return meta_info + csv_content
|
||||
|
||||
|
||||
# ============ 主界面 ============
|
||||
|
||||
def main():
|
||||
# 标题
|
||||
st.markdown('<h1 class="main-title">🧬 LNP 配方优化系统</h1>', unsafe_allow_html=True)
|
||||
st.markdown('<p class="sub-title">基于深度学习的脂质纳米颗粒配方智能优选</p>', unsafe_allow_html=True)
|
||||
|
||||
# 检查 API 状态
|
||||
api_online = check_api_status()
|
||||
|
||||
# ========== 侧边栏 ==========
|
||||
with st.sidebar:
|
||||
# st.header("⚙️ 参数设置")
|
||||
|
||||
# API 状态
|
||||
if api_online:
|
||||
st.success("🟢 API 服务在线")
|
||||
else:
|
||||
st.error("🔴 API 服务离线")
|
||||
st.info("请先启动 API 服务:\n```\nuvicorn app.api:app --port 8000\n```")
|
||||
|
||||
# st.divider()
|
||||
|
||||
# SMILES 输入
|
||||
st.subheader("🔬 分子结构")
|
||||
smiles_input = st.text_area(
|
||||
"输入阳离子脂质 SMILES",
|
||||
value="",
|
||||
height=100,
|
||||
placeholder="例如: CC(C)NCCNC(C)C\n多条SMILES用英文逗号分隔: SMI1,SMI2,SMI3",
|
||||
help="输入阳离子脂质的 SMILES 字符串。支持多条 SMILES,用英文逗号 (,) 分隔",
|
||||
)
|
||||
|
||||
# 示例 SMILES
|
||||
# with st.expander("📋 示例 SMILES"):
|
||||
# example_smiles = {
|
||||
# "DLin-MC3-DMA": "CC(C)=CCCC(C)=CCCC(C)=CCN(C)CCCCCCCCOC(=O)CCCCCCC/C=C\\CCCCCCCC",
|
||||
# "简单胺": "CC(C)NCCNC(C)C",
|
||||
# "长链胺": "CCCCCCCCCCCCNCCNCCCCCCCCCCCC",
|
||||
# }
|
||||
# for name, smi in example_smiles.items():
|
||||
# if st.button(f"使用 {name}", key=f"example_{name}"):
|
||||
# st.session_state["smiles_input"] = smi
|
||||
# st.rerun()
|
||||
|
||||
# st.divider()
|
||||
|
||||
# 目标器官选择
|
||||
st.subheader("🎯 目标器官")
|
||||
selected_organ = st.selectbox(
|
||||
"选择优化目标器官",
|
||||
options=AVAILABLE_ORGANS,
|
||||
format_func=lambda x: ORGAN_LABELS.get(x, x),
|
||||
index=0,
|
||||
)
|
||||
|
||||
# 给药途径选择
|
||||
st.subheader("💉 给药途径")
|
||||
selected_routes = st.multiselect(
|
||||
"选择给药途径",
|
||||
options=AVAILABLE_ROUTES,
|
||||
default=AVAILABLE_ROUTES,
|
||||
format_func=lambda x: ROUTE_LABELS.get(x, x),
|
||||
help="选择要搜索的给药途径,可多选。至少选择一种。",
|
||||
)
|
||||
if not selected_routes:
|
||||
st.warning("⚠️ 请至少选择一种给药途径")
|
||||
|
||||
# 高级选项
|
||||
with st.expander("🔧 高级选项"):
|
||||
st.markdown("**输出设置**")
|
||||
top_k = st.slider(
|
||||
"返回配方数量 (top_k)",
|
||||
min_value=5,
|
||||
max_value=100,
|
||||
value=20,
|
||||
step=5,
|
||||
help="最终返回的最优配方数量",
|
||||
)
|
||||
|
||||
st.markdown("**搜索策略**")
|
||||
num_seeds = st.slider(
|
||||
"种子点数量 (num_seeds)",
|
||||
min_value=10,
|
||||
max_value=200,
|
||||
value=top_k * 5,
|
||||
step=10,
|
||||
help="第一轮迭代后保留的种子点数量,更多种子点意味着更广泛的搜索",
|
||||
)
|
||||
|
||||
top_per_seed = st.slider(
|
||||
"每个种子的局部最优数 (top_per_seed)",
|
||||
min_value=1,
|
||||
max_value=5,
|
||||
value=1,
|
||||
step=1,
|
||||
help="后续迭代中,每个种子点邻域保留的局部最优数量",
|
||||
)
|
||||
|
||||
st.markdown("**迭代步长与轮数**")
|
||||
use_custom_steps = st.checkbox(
|
||||
"自定义迭代步长",
|
||||
value=False,
|
||||
help="默认步长为 [0.10, 0.02, 0.01],共3轮逐步精细化搜索。将某轮步长设为0可减少迭代轮数。",
|
||||
)
|
||||
|
||||
if use_custom_steps:
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
step1 = st.number_input(
|
||||
"第1轮步长",
|
||||
min_value=0.01, max_value=0.20, value=0.10,
|
||||
step=0.01, format="%.2f",
|
||||
help="第1轮为全局粗搜索,步长必须大于0",
|
||||
)
|
||||
with col2:
|
||||
step2 = st.number_input(
|
||||
"第2轮步长",
|
||||
min_value=0.00, max_value=0.10, value=0.02,
|
||||
step=0.01, format="%.2f",
|
||||
help="设为0则只进行1轮搜索",
|
||||
)
|
||||
with col3:
|
||||
step3 = st.number_input(
|
||||
"第3轮步长",
|
||||
min_value=0.00, max_value=0.05, value=0.01,
|
||||
step=0.01, format="%.2f",
|
||||
help="设为0则只进行2轮搜索",
|
||||
)
|
||||
|
||||
# 根据步长值构建实际的 step_sizes 列表
|
||||
# step2 为 0 → 只保留 [step1](1轮)
|
||||
# step3 为 0 → 只保留 [step1, step2](2轮)
|
||||
# 都不为 0 → [step1, step2, step3](3轮)
|
||||
if step2 == 0.0:
|
||||
step_sizes = [step1]
|
||||
elif step3 == 0.0:
|
||||
step_sizes = [step1, step2]
|
||||
else:
|
||||
step_sizes = [step1, step2, step3]
|
||||
|
||||
# 显示实际迭代轮数提示
|
||||
st.caption(f"📌 实际迭代轮数: {len(step_sizes)} 轮,步长: {step_sizes}")
|
||||
else:
|
||||
step_sizes = None # 使用默认值
|
||||
|
||||
st.markdown("**组分范围限制**")
|
||||
use_custom_ranges = st.checkbox(
|
||||
"自定义组分取值范围",
|
||||
value=False,
|
||||
help="限制各组分的取值范围(mol 比例加起来仍为 100%)",
|
||||
)
|
||||
|
||||
if use_custom_ranges:
|
||||
st.caption("阳离子脂质/mRNA 重量比")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
weight_ratio_min = st.number_input("最小", min_value=0.01, max_value=0.50, value=0.05, step=0.01, format="%.2f", key="wr_min")
|
||||
with col2:
|
||||
weight_ratio_max = st.number_input("最大", min_value=0.01, max_value=0.50, value=0.30, step=0.01, format="%.2f", key="wr_max")
|
||||
|
||||
st.caption("阳离子脂质 mol 比例")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
cationic_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.05, step=0.05, format="%.2f", key="cat_min")
|
||||
with col2:
|
||||
cationic_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="cat_max")
|
||||
|
||||
st.caption("磷脂 mol 比例")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
phospholipid_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="phos_min")
|
||||
with col2:
|
||||
phospholipid_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="phos_max")
|
||||
|
||||
st.caption("胆固醇 mol 比例")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
cholesterol_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="chol_min")
|
||||
with col2:
|
||||
cholesterol_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="chol_max")
|
||||
|
||||
st.caption("PEG 脂质 mol 比例")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
peg_mol_min = st.number_input("最小", min_value=0.00, max_value=0.20, value=0.00, step=0.01, format="%.2f", key="peg_min")
|
||||
with col2:
|
||||
peg_mol_max = st.number_input("最大", min_value=0.00, max_value=0.20, value=0.05, step=0.01, format="%.2f", key="peg_max")
|
||||
|
||||
comp_ranges = {
|
||||
"weight_ratio_min": weight_ratio_min,
|
||||
"weight_ratio_max": weight_ratio_max,
|
||||
"cationic_mol_min": cationic_mol_min,
|
||||
"cationic_mol_max": cationic_mol_max,
|
||||
"phospholipid_mol_min": phospholipid_mol_min,
|
||||
"phospholipid_mol_max": phospholipid_mol_max,
|
||||
"cholesterol_mol_min": cholesterol_mol_min,
|
||||
"cholesterol_mol_max": cholesterol_mol_max,
|
||||
"peg_mol_min": peg_mol_min,
|
||||
"peg_mol_max": peg_mol_max,
|
||||
}
|
||||
|
||||
# 简单验证
|
||||
min_sum = cationic_mol_min + phospholipid_mol_min + cholesterol_mol_min + peg_mol_min
|
||||
max_sum = cationic_mol_max + phospholipid_mol_max + cholesterol_mol_max + peg_mol_max
|
||||
if min_sum > 1.0 or max_sum < 1.0:
|
||||
st.warning("⚠️ 当前范围设置可能无法生成有效配方(mol 比例需加起来为 100%)")
|
||||
else:
|
||||
comp_ranges = None # 使用默认值
|
||||
|
||||
st.markdown("**评分/排序权重**")
|
||||
use_custom_scoring = st.checkbox(
|
||||
"自定义评分权重",
|
||||
value=False,
|
||||
help="默认仅按目标器官分布排序。开启后可自定义多目标加权评分,总分 = 各项score之和。",
|
||||
)
|
||||
|
||||
if use_custom_scoring:
|
||||
st.caption("**回归任务权重**")
|
||||
|
||||
sw_biodist = st.number_input(
|
||||
"器官分布 (Biodistribution)",
|
||||
min_value=0.00, max_value=10.00, value=0.30,
|
||||
step=0.05, format="%.2f", key="sw_biodist",
|
||||
help="score = biodist_value × weight",
|
||||
)
|
||||
sw_delivery = st.number_input(
|
||||
"量化递送 (Quantified Delivery)",
|
||||
min_value=0.00, max_value=10.00, value=0.25,
|
||||
step=0.05, format="%.2f", key="sw_delivery",
|
||||
help="score = normalize(delivery, route) × weight",
|
||||
)
|
||||
sw_size = st.number_input(
|
||||
"粒径 (Size, 80-150nm)",
|
||||
min_value=0.00, max_value=10.00, value=0.05,
|
||||
step=0.05, format="%.2f", key="sw_size",
|
||||
help="score = (1 if 60≤size≤150 else 0) × weight",
|
||||
)
|
||||
|
||||
st.caption("**包封率 (EE) 分类权重**")
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
sw_ee0 = st.number_input("<50% (低)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_ee0")
|
||||
with col2:
|
||||
sw_ee1 = st.number_input("50-80% (中)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_ee1")
|
||||
with col3:
|
||||
sw_ee2 = st.number_input(">80% (高)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_ee2")
|
||||
|
||||
st.caption("**PDI 分类权重**")
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
sw_pdi0 = st.number_input("<0.2 (优)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_pdi0")
|
||||
with col2:
|
||||
sw_pdi1 = st.number_input("0.2-0.3 (良)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_pdi1")
|
||||
with col3:
|
||||
sw_pdi2 = st.number_input("0.3-0.4 (中)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi2")
|
||||
with col4:
|
||||
sw_pdi3 = st.number_input(">0.4 (差)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi3")
|
||||
|
||||
st.caption("**毒性分类权重**")
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
sw_toxic0 = st.number_input("无毒", min_value=0.00, max_value=1.00, value=0.20, step=0.05, format="%.2f", key="sw_toxic0")
|
||||
with col2:
|
||||
sw_toxic1 = st.number_input("有毒", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="sw_toxic1")
|
||||
|
||||
scoring_weights = {
|
||||
"biodist_weight": sw_biodist,
|
||||
"delivery_weight": sw_delivery,
|
||||
"size_weight": sw_size,
|
||||
"ee_class_weights": [sw_ee0, sw_ee1, sw_ee2],
|
||||
"pdi_class_weights": [sw_pdi0, sw_pdi1, sw_pdi2, sw_pdi3],
|
||||
"toxic_class_weights": [sw_toxic0, sw_toxic1],
|
||||
}
|
||||
else:
|
||||
scoring_weights = None # 使用默认值(仅按 biodist 排序)
|
||||
|
||||
st.divider()
|
||||
|
||||
# 优化按钮
|
||||
optimize_button = st.button(
|
||||
"🚀 开始配方优选",
|
||||
type="primary",
|
||||
use_container_width=True,
|
||||
disabled=not api_online or not smiles_input.strip() or not selected_routes,
|
||||
)
|
||||
|
||||
# ========== 主内容区 ==========
|
||||
|
||||
# 使用 session state 存储结果
|
||||
if "results" not in st.session_state:
|
||||
st.session_state["results"] = None
|
||||
if "results_df" not in st.session_state:
|
||||
st.session_state["results_df"] = None
|
||||
|
||||
# 执行优化
|
||||
if optimize_button and smiles_input.strip():
|
||||
# 解析多条 SMILES(用逗号分隔)
|
||||
smiles_list = [s.strip() for s in smiles_input.split(",") if s.strip()]
|
||||
|
||||
if not smiles_list:
|
||||
st.error("❌ 请输入有效的 SMILES 字符串")
|
||||
else:
|
||||
is_multi_smiles = len(smiles_list) > 1
|
||||
all_results = []
|
||||
all_dfs = []
|
||||
errors = []
|
||||
|
||||
# 进度条
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
for idx, smiles in enumerate(smiles_list):
|
||||
status_text.text(f"🔄 正在优化 SMILES {idx + 1}/{len(smiles_list)}...")
|
||||
progress_bar.progress((idx) / len(smiles_list))
|
||||
|
||||
try:
|
||||
results = call_optimize_api(
|
||||
smiles=smiles,
|
||||
organ=selected_organ,
|
||||
top_k=top_k,
|
||||
num_seeds=num_seeds,
|
||||
top_per_seed=top_per_seed,
|
||||
step_sizes=step_sizes,
|
||||
comp_ranges=comp_ranges,
|
||||
routes=selected_routes,
|
||||
scoring_weights=scoring_weights,
|
||||
)
|
||||
all_results.append({"smiles": smiles, "results": results})
|
||||
|
||||
# 为多 SMILES 模式添加 SMILES 标签
|
||||
smiles_label = smiles[:30] + "..." if len(smiles) > 30 else smiles
|
||||
df = format_results_dataframe(results, smiles_label if is_multi_smiles else None)
|
||||
all_dfs.append(df)
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
try:
|
||||
error_detail = e.response.json().get("detail", str(e))
|
||||
except:
|
||||
error_detail = str(e)
|
||||
errors.append(f"SMILES {idx + 1}: {error_detail}")
|
||||
except httpx.RequestError as e:
|
||||
errors.append(f"SMILES {idx + 1}: API 连接失败 - {e}")
|
||||
except Exception as e:
|
||||
errors.append(f"SMILES {idx + 1}: {e}")
|
||||
|
||||
progress_bar.progress(1.0)
|
||||
status_text.empty()
|
||||
progress_bar.empty()
|
||||
|
||||
# 显示错误
|
||||
for err in errors:
|
||||
st.error(f"❌ {err}")
|
||||
|
||||
# 保存结果
|
||||
if all_results:
|
||||
st.session_state["results"] = all_results[0]["results"] if len(all_results) == 1 else all_results
|
||||
st.session_state["results_df"] = pd.concat(all_dfs, ignore_index=True) if all_dfs else None
|
||||
st.session_state["smiles_used"] = smiles_list
|
||||
st.session_state["organ_used"] = selected_organ
|
||||
st.session_state["is_multi_smiles"] = is_multi_smiles
|
||||
st.success(f"✅ 优化完成!成功处理 {len(all_results)}/{len(smiles_list)} 条 SMILES")
|
||||
|
||||
# 显示结果
|
||||
if st.session_state["results"] is not None and st.session_state["results_df"] is not None:
|
||||
results = st.session_state["results"]
|
||||
df = st.session_state["results_df"]
|
||||
is_multi_smiles = st.session_state.get("is_multi_smiles", False)
|
||||
|
||||
# 结果概览
|
||||
if is_multi_smiles:
|
||||
# 多 SMILES 模式
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
# 获取 target_organ(从第一个结果)
|
||||
first_result = results[0]["results"] if isinstance(results, list) else results
|
||||
target_organ = first_result["target_organ"]
|
||||
st.metric(
|
||||
"目标器官",
|
||||
ORGAN_LABELS.get(target_organ, target_organ).split(" ")[0],
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"SMILES 数量",
|
||||
len(results) if isinstance(results, list) else 1,
|
||||
)
|
||||
|
||||
with col3:
|
||||
st.metric(
|
||||
"总配方数",
|
||||
len(df),
|
||||
)
|
||||
else:
|
||||
# 单 SMILES 模式
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"目标器官",
|
||||
ORGAN_LABELS.get(results["target_organ"], results["target_organ"]).split(" ")[0],
|
||||
)
|
||||
|
||||
with col2:
|
||||
best_score = results["formulations"][0]["target_biodist"]
|
||||
st.metric(
|
||||
"最优分布",
|
||||
f"{best_score*100:.2f}%",
|
||||
)
|
||||
|
||||
with col3:
|
||||
st.metric(
|
||||
"优选配方数",
|
||||
len(results["formulations"]),
|
||||
)
|
||||
|
||||
st.divider()
|
||||
|
||||
# 结果表格
|
||||
st.subheader("📊 优选配方列表")
|
||||
|
||||
# 导出按钮行
|
||||
col_export, col_spacer = st.columns([1, 4])
|
||||
with col_export:
|
||||
smiles_used = st.session_state.get("smiles_used", "")
|
||||
if isinstance(smiles_used, list):
|
||||
smiles_used = ",".join(smiles_used)
|
||||
|
||||
csv_content = create_export_csv(
|
||||
df,
|
||||
smiles_used,
|
||||
st.session_state.get("organ_used", ""),
|
||||
)
|
||||
|
||||
# 获取 target_organ
|
||||
if is_multi_smiles:
|
||||
target_organ = results[0]["results"]["target_organ"] if isinstance(results, list) else results["target_organ"]
|
||||
else:
|
||||
target_organ = results["target_organ"]
|
||||
|
||||
st.download_button(
|
||||
label="📥 导出 CSV",
|
||||
data=csv_content,
|
||||
file_name=f"lnp_optimization_{target_organ}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv",
|
||||
)
|
||||
|
||||
# 显示表格
|
||||
st.dataframe(
|
||||
df,
|
||||
use_container_width=True,
|
||||
hide_index=True,
|
||||
height=600,
|
||||
)
|
||||
|
||||
# 详细信息
|
||||
# with st.expander("🔍 查看最优配方详情"):
|
||||
# best = results["formulations"][0]
|
||||
|
||||
# col1, col2 = st.columns(2)
|
||||
|
||||
# with col1:
|
||||
# st.markdown("**配方参数**")
|
||||
# st.json({
|
||||
# "阳离子脂质/mRNA 比例": best["cationic_lipid_to_mrna_ratio"],
|
||||
# "阳离子脂质 (mol%)": best["cationic_lipid_mol_ratio"],
|
||||
# "磷脂 (mol%)": best["phospholipid_mol_ratio"],
|
||||
# "胆固醇 (mol%)": best["cholesterol_mol_ratio"],
|
||||
# "PEG 脂质 (mol%)": best["peg_lipid_mol_ratio"],
|
||||
# "辅助脂质": best["helper_lipid"],
|
||||
# "给药途径": best["route"],
|
||||
# })
|
||||
|
||||
# with col2:
|
||||
# st.markdown("**各器官 Biodistribution 预测**")
|
||||
# biodist_df = pd.DataFrame([
|
||||
# {"器官": ORGAN_LABELS.get(k, k), "Biodistribution": f"{v:.4f}"}
|
||||
# for k, v in best["all_biodist"].items()
|
||||
# ])
|
||||
# st.dataframe(biodist_df, hide_index=True, use_container_width=True)
|
||||
|
||||
else:
|
||||
# 欢迎信息
|
||||
st.info("👈 请在左侧输入 SMILES 并选择目标器官,然后点击「开始配方优选」")
|
||||
|
||||
# 使用说明
|
||||
# with st.expander("📖 使用说明"):
|
||||
# st.markdown("""
|
||||
# ### 如何使用
|
||||
|
||||
# 1. **输入 SMILES**: 在左侧输入框中输入阳离子脂质的 SMILES 字符串
|
||||
# 2. **选择目标器官**: 选择您希望优化的器官靶向
|
||||
# 3. **点击优选**: 系统将自动搜索最优配方组合
|
||||
# 4. **查看结果**: 右侧将显示 Top-20 优选配方
|
||||
# 5. **导出数据**: 点击导出按钮将结果保存为 CSV 文件
|
||||
|
||||
# ### 优化参数
|
||||
|
||||
# 系统会优化以下配方参数:
|
||||
# - **阳离子脂质/mRNA 比例**: 0.05 - 0.30
|
||||
# - **阳离子脂质 mol 比例**: 0.05 - 0.80
|
||||
# - **磷脂 mol 比例**: 0.00 - 0.80
|
||||
# - **胆固醇 mol 比例**: 0.00 - 0.80
|
||||
# - **PEG 脂质 mol 比例**: 0.00 - 0.05
|
||||
# - **辅助脂质**: DOPE / DSPC / DOTAP
|
||||
# - **给药途径**: 静脉注射 / 肌肉注射
|
||||
|
||||
# ### 约束条件
|
||||
|
||||
# mol 比例之和 = 1 (阳离子脂质 + 磷脂 + 胆固醇 + PEG 脂质)
|
||||
# """)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
1016
app/optimize.py
Normal file
1016
app/optimize.py
Normal file
File diff suppressed because it is too large
Load Diff
165
data/external/all_amine_split_for_LiON/cv_0/args.json
vendored
Normal file
165
data/external/all_amine_split_for_LiON/cv_0/args.json
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"activation": "ReLU",
|
||||
"adding_bond_types": true,
|
||||
"adding_h": false,
|
||||
"aggregation": "mean",
|
||||
"aggregation_norm": 100,
|
||||
"atom_constraints": [],
|
||||
"atom_descriptor_scaling": true,
|
||||
"atom_descriptors": null,
|
||||
"atom_descriptors_path": null,
|
||||
"atom_descriptors_size": 0,
|
||||
"atom_features_size": 0,
|
||||
"atom_messages": false,
|
||||
"atom_targets": [],
|
||||
"batch_size": 50,
|
||||
"bias": false,
|
||||
"bias_solvent": false,
|
||||
"bond_constraints": [],
|
||||
"bond_descriptor_scaling": true,
|
||||
"bond_descriptors": null,
|
||||
"bond_descriptors_path": null,
|
||||
"bond_descriptors_size": 0,
|
||||
"bond_features_size": 0,
|
||||
"bond_targets": [],
|
||||
"cache_cutoff": 10000,
|
||||
"checkpoint_dir": null,
|
||||
"checkpoint_frzn": null,
|
||||
"checkpoint_path": null,
|
||||
"checkpoint_paths": null,
|
||||
"class_balance": false,
|
||||
"config_path": "../data/args_files/optimized_configs.json",
|
||||
"constraints_path": null,
|
||||
"crossval_index_dir": null,
|
||||
"crossval_index_file": null,
|
||||
"crossval_index_sets": null,
|
||||
"cuda": true,
|
||||
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train.csv",
|
||||
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train_weights.csv",
|
||||
"dataset_type": "regression",
|
||||
"depth": 4,
|
||||
"depth_solvent": 3,
|
||||
"device": {
|
||||
"_string": "cuda",
|
||||
"_type": "python_object (type = device)",
|
||||
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
|
||||
},
|
||||
"dropout": 0.1,
|
||||
"empty_cache": false,
|
||||
"ensemble_size": 1,
|
||||
"epochs": 50,
|
||||
"evidential_regularization": 0,
|
||||
"explicit_h": false,
|
||||
"extra_metrics": [],
|
||||
"features_generator": null,
|
||||
"features_only": false,
|
||||
"features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_0/train_extra_x.csv"
|
||||
],
|
||||
"features_scaling": true,
|
||||
"features_size": null,
|
||||
"ffn_hidden_size": 600,
|
||||
"ffn_num_layers": 3,
|
||||
"final_lr": 0.0001,
|
||||
"folds_file": null,
|
||||
"freeze_first_only": false,
|
||||
"frzn_ffn_layers": 0,
|
||||
"gpu": null,
|
||||
"grad_clip": null,
|
||||
"hidden_size": 600,
|
||||
"hidden_size_solvent": 300,
|
||||
"ignore_columns": null,
|
||||
"ignore_nan_metrics": false,
|
||||
"init_lr": 0.0001,
|
||||
"is_atom_bond_targets": false,
|
||||
"keeping_atom_map": false,
|
||||
"log_frequency": 10,
|
||||
"loss_function": "mse",
|
||||
"max_data_size": null,
|
||||
"max_lr": 0.001,
|
||||
"metric": "rmse",
|
||||
"metrics": [
|
||||
"rmse"
|
||||
],
|
||||
"minimize_score": true,
|
||||
"mpn_shared": false,
|
||||
"multiclass_num_classes": 3,
|
||||
"no_adding_bond_types": false,
|
||||
"no_atom_descriptor_scaling": false,
|
||||
"no_bond_descriptor_scaling": false,
|
||||
"no_cache_mol": false,
|
||||
"no_cuda": false,
|
||||
"no_features_scaling": false,
|
||||
"no_shared_atom_bond_ffn": false,
|
||||
"num_folds": 1,
|
||||
"num_lrs": 1,
|
||||
"num_tasks": 1,
|
||||
"num_workers": 8,
|
||||
"number_of_molecules": 1,
|
||||
"overwrite_default_atom_features": false,
|
||||
"overwrite_default_bond_features": false,
|
||||
"phase_features_path": null,
|
||||
"pytorch_seed": 0,
|
||||
"quantile_loss_alpha": 0.1,
|
||||
"quantiles": [],
|
||||
"quiet": false,
|
||||
"reaction": false,
|
||||
"reaction_mode": "reac_diff",
|
||||
"reaction_solvent": false,
|
||||
"reproducibility": {
|
||||
"command_line": "python main_script.py train all_amine_split_for_paper",
|
||||
"git_has_uncommitted_changes": true,
|
||||
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
|
||||
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
|
||||
"time": "Tue Jul 30 10:15:25 2024"
|
||||
},
|
||||
"resume_experiment": false,
|
||||
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_0",
|
||||
"save_preds": false,
|
||||
"save_smiles_splits": false,
|
||||
"seed": 42,
|
||||
"separate_test_atom_descriptors_path": null,
|
||||
"separate_test_bond_descriptors_path": null,
|
||||
"separate_test_constraints_path": null,
|
||||
"separate_test_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_0/test_extra_x.csv"
|
||||
],
|
||||
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/test.csv",
|
||||
"separate_test_phase_features_path": null,
|
||||
"separate_val_atom_descriptors_path": null,
|
||||
"separate_val_bond_descriptors_path": null,
|
||||
"separate_val_constraints_path": null,
|
||||
"separate_val_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_0/valid_extra_x.csv"
|
||||
],
|
||||
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/valid.csv",
|
||||
"separate_val_phase_features_path": null,
|
||||
"shared_atom_bond_ffn": true,
|
||||
"show_individual_scores": false,
|
||||
"smiles_columns": [
|
||||
"smiles"
|
||||
],
|
||||
"spectra_activation": "exp",
|
||||
"spectra_phase_mask_path": null,
|
||||
"spectra_target_floor": 1e-08,
|
||||
"split_key_molecule": 0,
|
||||
"split_sizes": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"split_type": "random",
|
||||
"target_columns": null,
|
||||
"target_weights": null,
|
||||
"task_names": [
|
||||
"quantified_delivery"
|
||||
],
|
||||
"test": false,
|
||||
"test_fold_index": null,
|
||||
"train_data_size": null,
|
||||
"undirected": false,
|
||||
"use_input_features": true,
|
||||
"val_fold_index": null,
|
||||
"warmup_epochs": 2.0,
|
||||
"weights_ffn_num_layers": 2
|
||||
}
|
||||
Binary file not shown.
3
data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/model.pt
vendored
Normal file
3
data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/model.pt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3b6dcfdd1b82a685b007ed06ad323defdb7486b24917c1ec515dbd2c5b927f08
|
||||
size 6540631
|
||||
5
data/external/all_amine_split_for_LiON/cv_0/fold_0/test_scores.json
vendored
Normal file
5
data/external/all_amine_split_for_LiON/cv_0/fold_0/test_scores.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"rmse": [
|
||||
0.8880622451903801
|
||||
]
|
||||
}
|
||||
2038
data/external/all_amine_split_for_LiON/cv_0/preds.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_0/preds.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_0/test.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_0/test.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_0/test_extra_x.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_0/test_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_0/test_metadata.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_0/test_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
data/external/all_amine_split_for_LiON/cv_0/test_scores.csv
vendored
Normal file
2
data/external/all_amine_split_for_LiON/cv_0/test_scores.csv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
|
||||
quantified_delivery,0.8880622451903801,0.0,0.8880622451903801
|
||||
|
2038
data/external/all_amine_split_for_LiON/cv_0/test_weights.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_0/test_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5943
data/external/all_amine_split_for_LiON/cv_0/train.csv
vendored
Normal file
5943
data/external/all_amine_split_for_LiON/cv_0/train.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5943
data/external/all_amine_split_for_LiON/cv_0/train_extra_x.csv
vendored
Normal file
5943
data/external/all_amine_split_for_LiON/cv_0/train_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5943
data/external/all_amine_split_for_LiON/cv_0/train_metadata.csv
vendored
Normal file
5943
data/external/all_amine_split_for_LiON/cv_0/train_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
5943
data/external/all_amine_split_for_LiON/cv_0/train_weights.csv
vendored
Normal file
5943
data/external/all_amine_split_for_LiON/cv_0/train_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_0/valid.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_0/valid.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_0/valid_extra_x.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_0/valid_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_0/valid_metadata.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_0/valid_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_0/valid_weights.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_0/valid_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
165
data/external/all_amine_split_for_LiON/cv_1/args.json
vendored
Normal file
165
data/external/all_amine_split_for_LiON/cv_1/args.json
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"activation": "ReLU",
|
||||
"adding_bond_types": true,
|
||||
"adding_h": false,
|
||||
"aggregation": "mean",
|
||||
"aggregation_norm": 100,
|
||||
"atom_constraints": [],
|
||||
"atom_descriptor_scaling": true,
|
||||
"atom_descriptors": null,
|
||||
"atom_descriptors_path": null,
|
||||
"atom_descriptors_size": 0,
|
||||
"atom_features_size": 0,
|
||||
"atom_messages": false,
|
||||
"atom_targets": [],
|
||||
"batch_size": 50,
|
||||
"bias": false,
|
||||
"bias_solvent": false,
|
||||
"bond_constraints": [],
|
||||
"bond_descriptor_scaling": true,
|
||||
"bond_descriptors": null,
|
||||
"bond_descriptors_path": null,
|
||||
"bond_descriptors_size": 0,
|
||||
"bond_features_size": 0,
|
||||
"bond_targets": [],
|
||||
"cache_cutoff": 10000,
|
||||
"checkpoint_dir": null,
|
||||
"checkpoint_frzn": null,
|
||||
"checkpoint_path": null,
|
||||
"checkpoint_paths": null,
|
||||
"class_balance": false,
|
||||
"config_path": "../data/args_files/optimized_configs.json",
|
||||
"constraints_path": null,
|
||||
"crossval_index_dir": null,
|
||||
"crossval_index_file": null,
|
||||
"crossval_index_sets": null,
|
||||
"cuda": true,
|
||||
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train.csv",
|
||||
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train_weights.csv",
|
||||
"dataset_type": "regression",
|
||||
"depth": 4,
|
||||
"depth_solvent": 3,
|
||||
"device": {
|
||||
"_string": "cuda",
|
||||
"_type": "python_object (type = device)",
|
||||
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
|
||||
},
|
||||
"dropout": 0.1,
|
||||
"empty_cache": false,
|
||||
"ensemble_size": 1,
|
||||
"epochs": 50,
|
||||
"evidential_regularization": 0,
|
||||
"explicit_h": false,
|
||||
"extra_metrics": [],
|
||||
"features_generator": null,
|
||||
"features_only": false,
|
||||
"features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_1/train_extra_x.csv"
|
||||
],
|
||||
"features_scaling": true,
|
||||
"features_size": null,
|
||||
"ffn_hidden_size": 600,
|
||||
"ffn_num_layers": 3,
|
||||
"final_lr": 0.0001,
|
||||
"folds_file": null,
|
||||
"freeze_first_only": false,
|
||||
"frzn_ffn_layers": 0,
|
||||
"gpu": null,
|
||||
"grad_clip": null,
|
||||
"hidden_size": 600,
|
||||
"hidden_size_solvent": 300,
|
||||
"ignore_columns": null,
|
||||
"ignore_nan_metrics": false,
|
||||
"init_lr": 0.0001,
|
||||
"is_atom_bond_targets": false,
|
||||
"keeping_atom_map": false,
|
||||
"log_frequency": 10,
|
||||
"loss_function": "mse",
|
||||
"max_data_size": null,
|
||||
"max_lr": 0.001,
|
||||
"metric": "rmse",
|
||||
"metrics": [
|
||||
"rmse"
|
||||
],
|
||||
"minimize_score": true,
|
||||
"mpn_shared": false,
|
||||
"multiclass_num_classes": 3,
|
||||
"no_adding_bond_types": false,
|
||||
"no_atom_descriptor_scaling": false,
|
||||
"no_bond_descriptor_scaling": false,
|
||||
"no_cache_mol": false,
|
||||
"no_cuda": false,
|
||||
"no_features_scaling": false,
|
||||
"no_shared_atom_bond_ffn": false,
|
||||
"num_folds": 1,
|
||||
"num_lrs": 1,
|
||||
"num_tasks": 1,
|
||||
"num_workers": 8,
|
||||
"number_of_molecules": 1,
|
||||
"overwrite_default_atom_features": false,
|
||||
"overwrite_default_bond_features": false,
|
||||
"phase_features_path": null,
|
||||
"pytorch_seed": 0,
|
||||
"quantile_loss_alpha": 0.1,
|
||||
"quantiles": [],
|
||||
"quiet": false,
|
||||
"reaction": false,
|
||||
"reaction_mode": "reac_diff",
|
||||
"reaction_solvent": false,
|
||||
"reproducibility": {
|
||||
"command_line": "python main_script.py train all_amine_split_for_paper",
|
||||
"git_has_uncommitted_changes": true,
|
||||
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
|
||||
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
|
||||
"time": "Tue Jul 30 10:21:40 2024"
|
||||
},
|
||||
"resume_experiment": false,
|
||||
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_1",
|
||||
"save_preds": false,
|
||||
"save_smiles_splits": false,
|
||||
"seed": 42,
|
||||
"separate_test_atom_descriptors_path": null,
|
||||
"separate_test_bond_descriptors_path": null,
|
||||
"separate_test_constraints_path": null,
|
||||
"separate_test_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_1/test_extra_x.csv"
|
||||
],
|
||||
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/test.csv",
|
||||
"separate_test_phase_features_path": null,
|
||||
"separate_val_atom_descriptors_path": null,
|
||||
"separate_val_bond_descriptors_path": null,
|
||||
"separate_val_constraints_path": null,
|
||||
"separate_val_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_1/valid_extra_x.csv"
|
||||
],
|
||||
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/valid.csv",
|
||||
"separate_val_phase_features_path": null,
|
||||
"shared_atom_bond_ffn": true,
|
||||
"show_individual_scores": false,
|
||||
"smiles_columns": [
|
||||
"smiles"
|
||||
],
|
||||
"spectra_activation": "exp",
|
||||
"spectra_phase_mask_path": null,
|
||||
"spectra_target_floor": 1e-08,
|
||||
"split_key_molecule": 0,
|
||||
"split_sizes": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"split_type": "random",
|
||||
"target_columns": null,
|
||||
"target_weights": null,
|
||||
"task_names": [
|
||||
"quantified_delivery"
|
||||
],
|
||||
"test": false,
|
||||
"test_fold_index": null,
|
||||
"train_data_size": null,
|
||||
"undirected": false,
|
||||
"use_input_features": true,
|
||||
"val_fold_index": null,
|
||||
"warmup_epochs": 2.0,
|
||||
"weights_ffn_num_layers": 2
|
||||
}
|
||||
Binary file not shown.
3
data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/model.pt
vendored
Normal file
3
data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/model.pt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6518259648eb75f0717f93d800048f25eeb8dec9fca13d7f1c02235c2ef8bda8
|
||||
size 6540631
|
||||
5
data/external/all_amine_split_for_LiON/cv_1/fold_0/test_scores.json
vendored
Normal file
5
data/external/all_amine_split_for_LiON/cv_1/fold_0/test_scores.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"rmse": [
|
||||
1.01673724295223
|
||||
]
|
||||
}
|
||||
1659
data/external/all_amine_split_for_LiON/cv_1/preds.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_1/preds.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_1/test.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_1/test.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_1/test_extra_x.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_1/test_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1659
data/external/all_amine_split_for_LiON/cv_1/test_metadata.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_1/test_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
data/external/all_amine_split_for_LiON/cv_1/test_scores.csv
vendored
Normal file
2
data/external/all_amine_split_for_LiON/cv_1/test_scores.csv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
|
||||
quantified_delivery,1.01673724295223,0.0,1.01673724295223
|
||||
|
1659
data/external/all_amine_split_for_LiON/cv_1/test_weights.csv
vendored
Normal file
1659
data/external/all_amine_split_for_LiON/cv_1/test_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6365
data/external/all_amine_split_for_LiON/cv_1/train.csv
vendored
Normal file
6365
data/external/all_amine_split_for_LiON/cv_1/train.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6365
data/external/all_amine_split_for_LiON/cv_1/train_extra_x.csv
vendored
Normal file
6365
data/external/all_amine_split_for_LiON/cv_1/train_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6365
data/external/all_amine_split_for_LiON/cv_1/train_metadata.csv
vendored
Normal file
6365
data/external/all_amine_split_for_LiON/cv_1/train_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6365
data/external/all_amine_split_for_LiON/cv_1/train_weights.csv
vendored
Normal file
6365
data/external/all_amine_split_for_LiON/cv_1/train_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_1/valid.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_1/valid.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_1/valid_extra_x.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_1/valid_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_1/valid_metadata.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_1/valid_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_1/valid_weights.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_1/valid_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
165
data/external/all_amine_split_for_LiON/cv_2/args.json
vendored
Normal file
165
data/external/all_amine_split_for_LiON/cv_2/args.json
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"activation": "ReLU",
|
||||
"adding_bond_types": true,
|
||||
"adding_h": false,
|
||||
"aggregation": "mean",
|
||||
"aggregation_norm": 100,
|
||||
"atom_constraints": [],
|
||||
"atom_descriptor_scaling": true,
|
||||
"atom_descriptors": null,
|
||||
"atom_descriptors_path": null,
|
||||
"atom_descriptors_size": 0,
|
||||
"atom_features_size": 0,
|
||||
"atom_messages": false,
|
||||
"atom_targets": [],
|
||||
"batch_size": 50,
|
||||
"bias": false,
|
||||
"bias_solvent": false,
|
||||
"bond_constraints": [],
|
||||
"bond_descriptor_scaling": true,
|
||||
"bond_descriptors": null,
|
||||
"bond_descriptors_path": null,
|
||||
"bond_descriptors_size": 0,
|
||||
"bond_features_size": 0,
|
||||
"bond_targets": [],
|
||||
"cache_cutoff": 10000,
|
||||
"checkpoint_dir": null,
|
||||
"checkpoint_frzn": null,
|
||||
"checkpoint_path": null,
|
||||
"checkpoint_paths": null,
|
||||
"class_balance": false,
|
||||
"config_path": "../data/args_files/optimized_configs.json",
|
||||
"constraints_path": null,
|
||||
"crossval_index_dir": null,
|
||||
"crossval_index_file": null,
|
||||
"crossval_index_sets": null,
|
||||
"cuda": true,
|
||||
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train.csv",
|
||||
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train_weights.csv",
|
||||
"dataset_type": "regression",
|
||||
"depth": 4,
|
||||
"depth_solvent": 3,
|
||||
"device": {
|
||||
"_string": "cuda",
|
||||
"_type": "python_object (type = device)",
|
||||
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
|
||||
},
|
||||
"dropout": 0.1,
|
||||
"empty_cache": false,
|
||||
"ensemble_size": 1,
|
||||
"epochs": 50,
|
||||
"evidential_regularization": 0,
|
||||
"explicit_h": false,
|
||||
"extra_metrics": [],
|
||||
"features_generator": null,
|
||||
"features_only": false,
|
||||
"features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_2/train_extra_x.csv"
|
||||
],
|
||||
"features_scaling": true,
|
||||
"features_size": null,
|
||||
"ffn_hidden_size": 600,
|
||||
"ffn_num_layers": 3,
|
||||
"final_lr": 0.0001,
|
||||
"folds_file": null,
|
||||
"freeze_first_only": false,
|
||||
"frzn_ffn_layers": 0,
|
||||
"gpu": null,
|
||||
"grad_clip": null,
|
||||
"hidden_size": 600,
|
||||
"hidden_size_solvent": 300,
|
||||
"ignore_columns": null,
|
||||
"ignore_nan_metrics": false,
|
||||
"init_lr": 0.0001,
|
||||
"is_atom_bond_targets": false,
|
||||
"keeping_atom_map": false,
|
||||
"log_frequency": 10,
|
||||
"loss_function": "mse",
|
||||
"max_data_size": null,
|
||||
"max_lr": 0.001,
|
||||
"metric": "rmse",
|
||||
"metrics": [
|
||||
"rmse"
|
||||
],
|
||||
"minimize_score": true,
|
||||
"mpn_shared": false,
|
||||
"multiclass_num_classes": 3,
|
||||
"no_adding_bond_types": false,
|
||||
"no_atom_descriptor_scaling": false,
|
||||
"no_bond_descriptor_scaling": false,
|
||||
"no_cache_mol": false,
|
||||
"no_cuda": false,
|
||||
"no_features_scaling": false,
|
||||
"no_shared_atom_bond_ffn": false,
|
||||
"num_folds": 1,
|
||||
"num_lrs": 1,
|
||||
"num_tasks": 1,
|
||||
"num_workers": 8,
|
||||
"number_of_molecules": 1,
|
||||
"overwrite_default_atom_features": false,
|
||||
"overwrite_default_bond_features": false,
|
||||
"phase_features_path": null,
|
||||
"pytorch_seed": 0,
|
||||
"quantile_loss_alpha": 0.1,
|
||||
"quantiles": [],
|
||||
"quiet": false,
|
||||
"reaction": false,
|
||||
"reaction_mode": "reac_diff",
|
||||
"reaction_solvent": false,
|
||||
"reproducibility": {
|
||||
"command_line": "python main_script.py train all_amine_split_for_paper",
|
||||
"git_has_uncommitted_changes": true,
|
||||
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
|
||||
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
|
||||
"time": "Tue Jul 30 10:28:04 2024"
|
||||
},
|
||||
"resume_experiment": false,
|
||||
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_2",
|
||||
"save_preds": false,
|
||||
"save_smiles_splits": false,
|
||||
"seed": 42,
|
||||
"separate_test_atom_descriptors_path": null,
|
||||
"separate_test_bond_descriptors_path": null,
|
||||
"separate_test_constraints_path": null,
|
||||
"separate_test_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_2/test_extra_x.csv"
|
||||
],
|
||||
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/test.csv",
|
||||
"separate_test_phase_features_path": null,
|
||||
"separate_val_atom_descriptors_path": null,
|
||||
"separate_val_bond_descriptors_path": null,
|
||||
"separate_val_constraints_path": null,
|
||||
"separate_val_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_2/valid_extra_x.csv"
|
||||
],
|
||||
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/valid.csv",
|
||||
"separate_val_phase_features_path": null,
|
||||
"shared_atom_bond_ffn": true,
|
||||
"show_individual_scores": false,
|
||||
"smiles_columns": [
|
||||
"smiles"
|
||||
],
|
||||
"spectra_activation": "exp",
|
||||
"spectra_phase_mask_path": null,
|
||||
"spectra_target_floor": 1e-08,
|
||||
"split_key_molecule": 0,
|
||||
"split_sizes": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"split_type": "random",
|
||||
"target_columns": null,
|
||||
"target_weights": null,
|
||||
"task_names": [
|
||||
"quantified_delivery"
|
||||
],
|
||||
"test": false,
|
||||
"test_fold_index": null,
|
||||
"train_data_size": null,
|
||||
"undirected": false,
|
||||
"use_input_features": true,
|
||||
"val_fold_index": null,
|
||||
"warmup_epochs": 2.0,
|
||||
"weights_ffn_num_layers": 2
|
||||
}
|
||||
Binary file not shown.
3
data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/model.pt
vendored
Normal file
3
data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/model.pt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5df89f8f7b97d314e05171db5891a0fb2199f3b591dbc9d44680e77811520acb
|
||||
size 6540631
|
||||
5
data/external/all_amine_split_for_LiON/cv_2/fold_0/test_scores.json
vendored
Normal file
5
data/external/all_amine_split_for_LiON/cv_2/fold_0/test_scores.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"rmse": [
|
||||
0.8788072588544181
|
||||
]
|
||||
}
|
||||
1616
data/external/all_amine_split_for_LiON/cv_2/preds.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_2/preds.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_2/test.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_2/test.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_2/test_extra_x.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_2/test_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1616
data/external/all_amine_split_for_LiON/cv_2/test_metadata.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_2/test_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
data/external/all_amine_split_for_LiON/cv_2/test_scores.csv
vendored
Normal file
2
data/external/all_amine_split_for_LiON/cv_2/test_scores.csv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
|
||||
quantified_delivery,0.8788072588544181,0.0,0.8788072588544181
|
||||
|
1616
data/external/all_amine_split_for_LiON/cv_2/test_weights.csv
vendored
Normal file
1616
data/external/all_amine_split_for_LiON/cv_2/test_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6269
data/external/all_amine_split_for_LiON/cv_2/train.csv
vendored
Normal file
6269
data/external/all_amine_split_for_LiON/cv_2/train.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6269
data/external/all_amine_split_for_LiON/cv_2/train_extra_x.csv
vendored
Normal file
6269
data/external/all_amine_split_for_LiON/cv_2/train_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6269
data/external/all_amine_split_for_LiON/cv_2/train_metadata.csv
vendored
Normal file
6269
data/external/all_amine_split_for_LiON/cv_2/train_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6269
data/external/all_amine_split_for_LiON/cv_2/train_weights.csv
vendored
Normal file
6269
data/external/all_amine_split_for_LiON/cv_2/train_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_2/valid.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_2/valid.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_2/valid_extra_x.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_2/valid_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_2/valid_metadata.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_2/valid_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_2/valid_weights.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_2/valid_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
165
data/external/all_amine_split_for_LiON/cv_3/args.json
vendored
Normal file
165
data/external/all_amine_split_for_LiON/cv_3/args.json
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"activation": "ReLU",
|
||||
"adding_bond_types": true,
|
||||
"adding_h": false,
|
||||
"aggregation": "mean",
|
||||
"aggregation_norm": 100,
|
||||
"atom_constraints": [],
|
||||
"atom_descriptor_scaling": true,
|
||||
"atom_descriptors": null,
|
||||
"atom_descriptors_path": null,
|
||||
"atom_descriptors_size": 0,
|
||||
"atom_features_size": 0,
|
||||
"atom_messages": false,
|
||||
"atom_targets": [],
|
||||
"batch_size": 50,
|
||||
"bias": false,
|
||||
"bias_solvent": false,
|
||||
"bond_constraints": [],
|
||||
"bond_descriptor_scaling": true,
|
||||
"bond_descriptors": null,
|
||||
"bond_descriptors_path": null,
|
||||
"bond_descriptors_size": 0,
|
||||
"bond_features_size": 0,
|
||||
"bond_targets": [],
|
||||
"cache_cutoff": 10000,
|
||||
"checkpoint_dir": null,
|
||||
"checkpoint_frzn": null,
|
||||
"checkpoint_path": null,
|
||||
"checkpoint_paths": null,
|
||||
"class_balance": false,
|
||||
"config_path": "../data/args_files/optimized_configs.json",
|
||||
"constraints_path": null,
|
||||
"crossval_index_dir": null,
|
||||
"crossval_index_file": null,
|
||||
"crossval_index_sets": null,
|
||||
"cuda": true,
|
||||
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train.csv",
|
||||
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train_weights.csv",
|
||||
"dataset_type": "regression",
|
||||
"depth": 4,
|
||||
"depth_solvent": 3,
|
||||
"device": {
|
||||
"_string": "cuda",
|
||||
"_type": "python_object (type = device)",
|
||||
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
|
||||
},
|
||||
"dropout": 0.1,
|
||||
"empty_cache": false,
|
||||
"ensemble_size": 1,
|
||||
"epochs": 50,
|
||||
"evidential_regularization": 0,
|
||||
"explicit_h": false,
|
||||
"extra_metrics": [],
|
||||
"features_generator": null,
|
||||
"features_only": false,
|
||||
"features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_3/train_extra_x.csv"
|
||||
],
|
||||
"features_scaling": true,
|
||||
"features_size": null,
|
||||
"ffn_hidden_size": 600,
|
||||
"ffn_num_layers": 3,
|
||||
"final_lr": 0.0001,
|
||||
"folds_file": null,
|
||||
"freeze_first_only": false,
|
||||
"frzn_ffn_layers": 0,
|
||||
"gpu": null,
|
||||
"grad_clip": null,
|
||||
"hidden_size": 600,
|
||||
"hidden_size_solvent": 300,
|
||||
"ignore_columns": null,
|
||||
"ignore_nan_metrics": false,
|
||||
"init_lr": 0.0001,
|
||||
"is_atom_bond_targets": false,
|
||||
"keeping_atom_map": false,
|
||||
"log_frequency": 10,
|
||||
"loss_function": "mse",
|
||||
"max_data_size": null,
|
||||
"max_lr": 0.001,
|
||||
"metric": "rmse",
|
||||
"metrics": [
|
||||
"rmse"
|
||||
],
|
||||
"minimize_score": true,
|
||||
"mpn_shared": false,
|
||||
"multiclass_num_classes": 3,
|
||||
"no_adding_bond_types": false,
|
||||
"no_atom_descriptor_scaling": false,
|
||||
"no_bond_descriptor_scaling": false,
|
||||
"no_cache_mol": false,
|
||||
"no_cuda": false,
|
||||
"no_features_scaling": false,
|
||||
"no_shared_atom_bond_ffn": false,
|
||||
"num_folds": 1,
|
||||
"num_lrs": 1,
|
||||
"num_tasks": 1,
|
||||
"num_workers": 8,
|
||||
"number_of_molecules": 1,
|
||||
"overwrite_default_atom_features": false,
|
||||
"overwrite_default_bond_features": false,
|
||||
"phase_features_path": null,
|
||||
"pytorch_seed": 0,
|
||||
"quantile_loss_alpha": 0.1,
|
||||
"quantiles": [],
|
||||
"quiet": false,
|
||||
"reaction": false,
|
||||
"reaction_mode": "reac_diff",
|
||||
"reaction_solvent": false,
|
||||
"reproducibility": {
|
||||
"command_line": "python main_script.py train all_amine_split_for_paper",
|
||||
"git_has_uncommitted_changes": true,
|
||||
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
|
||||
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
|
||||
"time": "Tue Jul 30 10:34:31 2024"
|
||||
},
|
||||
"resume_experiment": false,
|
||||
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_3",
|
||||
"save_preds": false,
|
||||
"save_smiles_splits": false,
|
||||
"seed": 42,
|
||||
"separate_test_atom_descriptors_path": null,
|
||||
"separate_test_bond_descriptors_path": null,
|
||||
"separate_test_constraints_path": null,
|
||||
"separate_test_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_3/test_extra_x.csv"
|
||||
],
|
||||
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/test.csv",
|
||||
"separate_test_phase_features_path": null,
|
||||
"separate_val_atom_descriptors_path": null,
|
||||
"separate_val_bond_descriptors_path": null,
|
||||
"separate_val_constraints_path": null,
|
||||
"separate_val_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_3/valid_extra_x.csv"
|
||||
],
|
||||
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/valid.csv",
|
||||
"separate_val_phase_features_path": null,
|
||||
"shared_atom_bond_ffn": true,
|
||||
"show_individual_scores": false,
|
||||
"smiles_columns": [
|
||||
"smiles"
|
||||
],
|
||||
"spectra_activation": "exp",
|
||||
"spectra_phase_mask_path": null,
|
||||
"spectra_target_floor": 1e-08,
|
||||
"split_key_molecule": 0,
|
||||
"split_sizes": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"split_type": "random",
|
||||
"target_columns": null,
|
||||
"target_weights": null,
|
||||
"task_names": [
|
||||
"quantified_delivery"
|
||||
],
|
||||
"test": false,
|
||||
"test_fold_index": null,
|
||||
"train_data_size": null,
|
||||
"undirected": false,
|
||||
"use_input_features": true,
|
||||
"val_fold_index": null,
|
||||
"warmup_epochs": 2.0,
|
||||
"weights_ffn_num_layers": 2
|
||||
}
|
||||
Binary file not shown.
3
data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/model.pt
vendored
Normal file
3
data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/model.pt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e5ae4ef9d7980963742eb04c54cdf5fe3a16db9d95c22db273ad072413b651b3
|
||||
size 6540631
|
||||
5
data/external/all_amine_split_for_LiON/cv_3/fold_0/test_scores.json
vendored
Normal file
5
data/external/all_amine_split_for_LiON/cv_3/fold_0/test_scores.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"rmse": [
|
||||
0.9245934905333985
|
||||
]
|
||||
}
|
||||
1755
data/external/all_amine_split_for_LiON/cv_3/preds.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_3/preds.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_3/test.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_3/test.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_3/test_extra_x.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_3/test_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1755
data/external/all_amine_split_for_LiON/cv_3/test_metadata.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_3/test_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
data/external/all_amine_split_for_LiON/cv_3/test_scores.csv
vendored
Normal file
2
data/external/all_amine_split_for_LiON/cv_3/test_scores.csv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
|
||||
quantified_delivery,0.9245934905333985,0.0,0.9245934905333985
|
||||
|
1755
data/external/all_amine_split_for_LiON/cv_3/test_weights.csv
vendored
Normal file
1755
data/external/all_amine_split_for_LiON/cv_3/test_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6364
data/external/all_amine_split_for_LiON/cv_3/train.csv
vendored
Normal file
6364
data/external/all_amine_split_for_LiON/cv_3/train.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6364
data/external/all_amine_split_for_LiON/cv_3/train_extra_x.csv
vendored
Normal file
6364
data/external/all_amine_split_for_LiON/cv_3/train_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6364
data/external/all_amine_split_for_LiON/cv_3/train_metadata.csv
vendored
Normal file
6364
data/external/all_amine_split_for_LiON/cv_3/train_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6364
data/external/all_amine_split_for_LiON/cv_3/train_weights.csv
vendored
Normal file
6364
data/external/all_amine_split_for_LiON/cv_3/train_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_3/valid.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_3/valid.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_3/valid_extra_x.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_3/valid_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_3/valid_metadata.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_3/valid_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_3/valid_weights.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_3/valid_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
165
data/external/all_amine_split_for_LiON/cv_4/args.json
vendored
Normal file
165
data/external/all_amine_split_for_LiON/cv_4/args.json
vendored
Normal file
@ -0,0 +1,165 @@
|
||||
{
|
||||
"activation": "ReLU",
|
||||
"adding_bond_types": true,
|
||||
"adding_h": false,
|
||||
"aggregation": "mean",
|
||||
"aggregation_norm": 100,
|
||||
"atom_constraints": [],
|
||||
"atom_descriptor_scaling": true,
|
||||
"atom_descriptors": null,
|
||||
"atom_descriptors_path": null,
|
||||
"atom_descriptors_size": 0,
|
||||
"atom_features_size": 0,
|
||||
"atom_messages": false,
|
||||
"atom_targets": [],
|
||||
"batch_size": 50,
|
||||
"bias": false,
|
||||
"bias_solvent": false,
|
||||
"bond_constraints": [],
|
||||
"bond_descriptor_scaling": true,
|
||||
"bond_descriptors": null,
|
||||
"bond_descriptors_path": null,
|
||||
"bond_descriptors_size": 0,
|
||||
"bond_features_size": 0,
|
||||
"bond_targets": [],
|
||||
"cache_cutoff": 10000,
|
||||
"checkpoint_dir": null,
|
||||
"checkpoint_frzn": null,
|
||||
"checkpoint_path": null,
|
||||
"checkpoint_paths": null,
|
||||
"class_balance": false,
|
||||
"config_path": "../data/args_files/optimized_configs.json",
|
||||
"constraints_path": null,
|
||||
"crossval_index_dir": null,
|
||||
"crossval_index_file": null,
|
||||
"crossval_index_sets": null,
|
||||
"cuda": true,
|
||||
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train.csv",
|
||||
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train_weights.csv",
|
||||
"dataset_type": "regression",
|
||||
"depth": 4,
|
||||
"depth_solvent": 3,
|
||||
"device": {
|
||||
"_string": "cuda",
|
||||
"_type": "python_object (type = device)",
|
||||
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
|
||||
},
|
||||
"dropout": 0.1,
|
||||
"empty_cache": false,
|
||||
"ensemble_size": 1,
|
||||
"epochs": 50,
|
||||
"evidential_regularization": 0,
|
||||
"explicit_h": false,
|
||||
"extra_metrics": [],
|
||||
"features_generator": null,
|
||||
"features_only": false,
|
||||
"features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_4/train_extra_x.csv"
|
||||
],
|
||||
"features_scaling": true,
|
||||
"features_size": null,
|
||||
"ffn_hidden_size": 600,
|
||||
"ffn_num_layers": 3,
|
||||
"final_lr": 0.0001,
|
||||
"folds_file": null,
|
||||
"freeze_first_only": false,
|
||||
"frzn_ffn_layers": 0,
|
||||
"gpu": null,
|
||||
"grad_clip": null,
|
||||
"hidden_size": 600,
|
||||
"hidden_size_solvent": 300,
|
||||
"ignore_columns": null,
|
||||
"ignore_nan_metrics": false,
|
||||
"init_lr": 0.0001,
|
||||
"is_atom_bond_targets": false,
|
||||
"keeping_atom_map": false,
|
||||
"log_frequency": 10,
|
||||
"loss_function": "mse",
|
||||
"max_data_size": null,
|
||||
"max_lr": 0.001,
|
||||
"metric": "rmse",
|
||||
"metrics": [
|
||||
"rmse"
|
||||
],
|
||||
"minimize_score": true,
|
||||
"mpn_shared": false,
|
||||
"multiclass_num_classes": 3,
|
||||
"no_adding_bond_types": false,
|
||||
"no_atom_descriptor_scaling": false,
|
||||
"no_bond_descriptor_scaling": false,
|
||||
"no_cache_mol": false,
|
||||
"no_cuda": false,
|
||||
"no_features_scaling": false,
|
||||
"no_shared_atom_bond_ffn": false,
|
||||
"num_folds": 1,
|
||||
"num_lrs": 1,
|
||||
"num_tasks": 1,
|
||||
"num_workers": 8,
|
||||
"number_of_molecules": 1,
|
||||
"overwrite_default_atom_features": false,
|
||||
"overwrite_default_bond_features": false,
|
||||
"phase_features_path": null,
|
||||
"pytorch_seed": 0,
|
||||
"quantile_loss_alpha": 0.1,
|
||||
"quantiles": [],
|
||||
"quiet": false,
|
||||
"reaction": false,
|
||||
"reaction_mode": "reac_diff",
|
||||
"reaction_solvent": false,
|
||||
"reproducibility": {
|
||||
"command_line": "python main_script.py train all_amine_split_for_paper",
|
||||
"git_has_uncommitted_changes": true,
|
||||
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
|
||||
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
|
||||
"time": "Tue Jul 30 10:40:44 2024"
|
||||
},
|
||||
"resume_experiment": false,
|
||||
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_4",
|
||||
"save_preds": false,
|
||||
"save_smiles_splits": false,
|
||||
"seed": 42,
|
||||
"separate_test_atom_descriptors_path": null,
|
||||
"separate_test_bond_descriptors_path": null,
|
||||
"separate_test_constraints_path": null,
|
||||
"separate_test_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_4/test_extra_x.csv"
|
||||
],
|
||||
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/test.csv",
|
||||
"separate_test_phase_features_path": null,
|
||||
"separate_val_atom_descriptors_path": null,
|
||||
"separate_val_bond_descriptors_path": null,
|
||||
"separate_val_constraints_path": null,
|
||||
"separate_val_features_path": [
|
||||
"../data/crossval_splits/all_amine_split_for_paper/cv_4/valid_extra_x.csv"
|
||||
],
|
||||
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/valid.csv",
|
||||
"separate_val_phase_features_path": null,
|
||||
"shared_atom_bond_ffn": true,
|
||||
"show_individual_scores": false,
|
||||
"smiles_columns": [
|
||||
"smiles"
|
||||
],
|
||||
"spectra_activation": "exp",
|
||||
"spectra_phase_mask_path": null,
|
||||
"spectra_target_floor": 1e-08,
|
||||
"split_key_molecule": 0,
|
||||
"split_sizes": [
|
||||
1.0,
|
||||
0.0,
|
||||
0.0
|
||||
],
|
||||
"split_type": "random",
|
||||
"target_columns": null,
|
||||
"target_weights": null,
|
||||
"task_names": [
|
||||
"quantified_delivery"
|
||||
],
|
||||
"test": false,
|
||||
"test_fold_index": null,
|
||||
"train_data_size": null,
|
||||
"undirected": false,
|
||||
"use_input_features": true,
|
||||
"val_fold_index": null,
|
||||
"warmup_epochs": 2.0,
|
||||
"weights_ffn_num_layers": 2
|
||||
}
|
||||
Binary file not shown.
3
data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/model.pt
vendored
Normal file
3
data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/model.pt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c69ab5f85cbca9dac0f871b1d6841a199cf40eeba2f46173eff9654a8f59bc8d
|
||||
size 6540631
|
||||
5
data/external/all_amine_split_for_LiON/cv_4/fold_0/test_scores.json
vendored
Normal file
5
data/external/all_amine_split_for_LiON/cv_4/fold_0/test_scores.json
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"rmse": [
|
||||
0.8268900471469541
|
||||
]
|
||||
}
|
||||
1521
data/external/all_amine_split_for_LiON/cv_4/preds.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_4/preds.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_4/test.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_4/test.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_4/test_extra_x.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_4/test_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1521
data/external/all_amine_split_for_LiON/cv_4/test_metadata.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_4/test_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
data/external/all_amine_split_for_LiON/cv_4/test_scores.csv
vendored
Normal file
2
data/external/all_amine_split_for_LiON/cv_4/test_scores.csv
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
|
||||
quantified_delivery,0.8268900471469541,0.0,0.8268900471469541
|
||||
|
1521
data/external/all_amine_split_for_LiON/cv_4/test_weights.csv
vendored
Normal file
1521
data/external/all_amine_split_for_LiON/cv_4/test_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6081
data/external/all_amine_split_for_LiON/cv_4/train.csv
vendored
Normal file
6081
data/external/all_amine_split_for_LiON/cv_4/train.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6081
data/external/all_amine_split_for_LiON/cv_4/train_extra_x.csv
vendored
Normal file
6081
data/external/all_amine_split_for_LiON/cv_4/train_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6081
data/external/all_amine_split_for_LiON/cv_4/train_metadata.csv
vendored
Normal file
6081
data/external/all_amine_split_for_LiON/cv_4/train_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6081
data/external/all_amine_split_for_LiON/cv_4/train_weights.csv
vendored
Normal file
6081
data/external/all_amine_split_for_LiON/cv_4/train_weights.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_4/valid.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_4/valid.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_4/valid_extra_x.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_4/valid_extra_x.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2038
data/external/all_amine_split_for_LiON/cv_4/valid_metadata.csv
vendored
Normal file
2038
data/external/all_amine_split_for_LiON/cv_4/valid_metadata.csv
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user