Compare commits

..

22 Commits

Author SHA1 Message Date
RYDE
5730490642
Merge pull request #1 from RYDE-WORK/amine_split
Amine split
2026-02-11 16:55:20 +08:00
RYDE-WORK
0b90caef1d Resolve conflicts 2026-02-11 16:51:21 +08:00
RYDE-WORK
a9392aa780 Update models and UI 2026-02-11 16:49:28 +08:00
RYDE-WORK
3f33f9d233 Add lfs 2026-02-11 16:45:21 +08:00
RYDE-WORK
c225fc67a7 Dedicated docker-compose.yml for gpu deployment 2026-01-26 11:11:52 +08:00
RYDE-WORK
3cce4c9373 Dockerize 2026-01-26 11:08:57 +08:00
RYDE-WORK
68119df128 Update app.py 2026-01-26 10:33:50 +08:00
RYDE-WORK
75e1dcb0eb Add UI 2026-01-26 00:09:21 +08:00
RYDE-WORK
982e98cced Add LNP fomular optimization 2026-01-25 23:54:20 +08:00
RYDE-WORK
39a14e4274 Add final models 2026-01-25 19:19:29 +08:00
RYDE-WORK
93a6f8654d ... 2026-01-23 17:51:08 +08:00
RYDE-WORK
a56637c8ac Add loss visualization 2026-01-23 13:40:22 +08:00
RYDE-WORK
871afc5988 Add random CV split 2026-01-23 10:12:25 +08:00
RYDE-WORK
ac4246c2b7 Add train_cv(without pretrain) 2026-01-22 18:06:13 +08:00
RYDE-WORK
47bbb64c66 Add more metrics 2026-01-22 17:06:24 +08:00
RYDE-WORK
039be54c5a ... 2026-01-22 01:01:29 +08:00
RYDE-WORK
e6a5e5495a ... 2026-01-22 00:24:13 +08:00
RYDE-WORK
e123fc8f3e update pretrain ratio 2026-01-21 23:36:53 +08:00
RYDE-WORK
c392b48994 Add CV results 2026-01-21 22:57:44 +08:00
RYDE-WORK
e1c85c83ba CV results 2026-01-21 20:10:48 +08:00
RYDE-WORK
a2bfb26dfc Add CV 2026-01-21 19:35:55 +08:00
RYDE-WORK
6773929ea2 增加LiON的评估指标 2026-01-21 16:20:10 +08:00
247 changed files with 218554 additions and 2142 deletions

75
.dockerignore Normal file
View File

@ -0,0 +1,75 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
*.egg-info/
.eggs/
dist/
build/
*.egg
# Virtual environments
.venv/
venv/
ENV/
env/
.pixi/
# IDE
.idea/
.vscode/
*.swp
*.swo
.cursor/
# Git
.git/
.gitignore
# Data (不需要打包到镜像)
data/
!data/.gitkeep
# Notebooks
notebooks/
*.ipynb
# Documentation
docs/
# Reports
reports/
# References
references/
# Scripts (训练脚本不需要)
scripts/
# Lock files
pixi.lock
# Tests
tests/
.pytest_cache/
# Logs
*.log
logs/
# Temporary files
*.tmp
*.temp
.DS_Store
# Models (will be mounted as volume or copied explicitly)
# Note: models/final/ is copied in Dockerfile
models/finetune_cv/
models/pretrain_cv/
models/mpnn/
models/*.pt
models/*.json
!models/final/

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
*.pt filter=lfs diff=lfs merge=lfs -text

63
Dockerfile Normal file
View File

@ -0,0 +1,63 @@
# LNP-ML Docker Image
# 多阶段构建,支持 API 和 Streamlit 两种服务
FROM python:3.8-slim AS base
# 设置环境变量
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libxrender1 \
libxext6 \
curl \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装 Python 依赖
RUN pip install --upgrade pip && \
pip install -r requirements.txt
# 复制项目代码
COPY pyproject.toml .
COPY README.md .
COPY LICENSE .
COPY lnp_ml/ ./lnp_ml/
COPY app/ ./app/
# 安装项目包
RUN pip install -e .
# 复制模型文件
COPY models/final/ ./models/final/
# ============ API 服务 ============
FROM base AS api
EXPOSE 8000
ENV MODEL_PATH=/app/models/final/model.pt
CMD ["uvicorn", "app.api:app", "--host", "0.0.0.0", "--port", "8000"]
# ============ Streamlit 服务 ============
FROM base AS streamlit
EXPOSE 8501
# Streamlit 配置
ENV STREAMLIT_SERVER_PORT=8501 \
STREAMLIT_SERVER_ADDRESS=0.0.0.0 \
STREAMLIT_SERVER_HEADLESS=true \
STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
CMD ["streamlit", "run", "app/app.py"]

179
Makefile
View File

@ -68,11 +68,29 @@ clean_data: requirements
data: requirements
$(PYTHON_INTERPRETER) scripts/process_data.py
## Process dataset for final training (interim -> processed/final, train:val=9:1, no test)
.PHONY: data_final
data_final: requirements
$(PYTHON_INTERPRETER) scripts/process_data_final.py
## Process external data for pretrain (external -> processed)
.PHONY: data_pretrain
data_pretrain: requirements
$(PYTHON_INTERPRETER) scripts/process_external.py
## Process CV data for cross-validation pretrain (external/all_amine_split_for_LiON -> processed/cv)
.PHONY: data_pretrain_cv
data_pretrain_cv: requirements
$(PYTHON_INTERPRETER) scripts/process_external_cv.py
## Process internal data with CV splitting (interim -> processed/cv)
## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
.PHONY: data_cv
data_cv: requirements
$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
# MPNN 支持:使用 USE_MPNN=1 启用 MPNN encoder
# 例如make pretrain USE_MPNN=1
MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
@ -81,40 +99,185 @@ MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
# 例如make finetune FREEZE_BACKBONE=1
FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)
# 设备选择:使用 DEVICE=xxx 指定设备
# 例如make train DEVICE=cuda:0 或 make test_cv DEVICE=mps
DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
## Pretrain on external data (delivery only)
.PHONY: pretrain
pretrain: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG)
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)
## Evaluate pretrain model (delivery metrics)
.PHONY: test_pretrain
test_pretrain: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG)
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
## Pretrain with cross-validation (5-fold)
.PHONY: pretrain_cv
pretrain_cv: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
.PHONY: test_pretrain_cv
test_pretrain_cv: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)
## Train model (multi-task, from scratch)
.PHONY: train
train: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG)
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG) $(DEVICE_FLAG)
## Finetune from pretrained checkpoint (use FREEZE_BACKBONE=1 to freeze backbone)
.PHONY: finetune
finetune: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG)
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
## Final training using all data (train:val=9:1, no test set), with pretrained weights
.PHONY: train_final
train_final: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train \
--train-path data/processed/final/train.parquet \
--val-path data/processed/final/val.parquet \
--output-dir models/final \
--init-from-pretrain models/pretrain_delivery.pt \
$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights
.PHONY: finetune_cv
finetune_cv: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
## Train with cross-validation on internal data only (5-fold, amine-based split)
.PHONY: train_cv
train_cv: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
.PHONY: test_cv
test_cv: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)
## Train with hyperparameter tuning
.PHONY: tune
tune: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG)
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
# ============ 嵌套 CV + Optuna 调参StratifiedKFold + 类权重) ============
# 通用参数:
# SEED: 随机种子 (默认: 42)
# N_TRIALS: Optuna 试验数 (默认: 20)
# EPOCHS_PER_TRIAL: 每个试验的最大 epoch (默认: 30)
# MIN_STRATUM_COUNT: 复合分层标签的最小样本数 (默认: 5)
# OUTPUT_DIR: 输出目录 (根据命令有不同默认值)
# INIT_PRETRAIN: 预训练权重路径 (默认: models/pretrain_delivery.pt)
SEED_FLAG = $(if $(SEED),--seed $(SEED),)
N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
# 默认使用预训练权重,设置 NO_PRETRAIN=1 可禁用
INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune)
## 用于模型评估:外层 5-fold 产生无偏性能估计,内层 3-fold 做超参搜索
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30
.PHONY: nested_cv_tune
nested_cv_tune: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.nested_cv_optuna \
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG)
## Final training with Optuna: 3-fold CV tune + full data train
## 用于最终模型训练3-fold 调参后用全量数据训练(无 early-stop
## 默认加载 models/pretrain_delivery.pt 预训练权重,使用 NO_PRETRAIN=1 禁用
## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1
.PHONY: final_optuna
final_optuna: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.final_train_optuna_cv \
$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)
## Run predictions
.PHONY: predict
predict: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)
## Test model on test set (with detailed metrics)
## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
.PHONY: test
test: requirements
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test
$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
## Formulation optimization: find optimal LNP formulation for target organ
## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
.PHONY: optimize
optimize: requirements
$(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG)
## Start FastAPI backend server (port 8000)
.PHONY: api
api: requirements
uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
## Start Streamlit frontend app (port 8501)
.PHONY: webapp
webapp: requirements
streamlit run app/app.py --server.port 8501
## Start both API and webapp (run in separate terminals)
.PHONY: serve
serve:
@echo "请在两个终端分别运行:"
@echo " 终端 1: make api"
@echo " 终端 2: make webapp"
@echo ""
@echo "然后访问: http://localhost:8501"
#################################################################################
# DOCKER COMMANDS #
#################################################################################
## Build Docker images
.PHONY: docker-build
docker-build:
docker compose build
## Start all services with Docker Compose
.PHONY: docker-up
docker-up:
docker compose up -d
## Stop all Docker services
.PHONY: docker-down
docker-down:
docker compose down
## View Docker logs
.PHONY: docker-logs
docker-logs:
docker compose logs -f
## Build and start all services
.PHONY: docker-serve
docker-serve: docker-build docker-up
@echo ""
@echo "🚀 服务已启动!"
@echo " - API: http://localhost:8000"
@echo " - Web 应用: http://localhost:8501"
@echo ""
@echo "查看日志: make docker-logs"
@echo "停止服务: make docker-down"
## Clean Docker resources (images, volumes, etc.)
.PHONY: docker-clean
docker-clean:
docker compose down -v --rmi local
docker system prune -f
#################################################################################

View File

@ -156,5 +156,12 @@ python -m lnp_ml.modeling.train \
└── plots.py <- Code to create visualizations
```
### 配方筛选
```
make optimize SMILES="CC(C)NCCNC(C)C" ORGAN=liver
```
--------

62
app/PARAM.md Normal file
View File

@ -0,0 +1,62 @@
## Possible Values
# comp token([B, 5], the sum of the latter four ratio is always 1)
Cationic_Lipid_to_mRNA_weight_ratio(float, Min: 0.05, Max: 0.3, Step Size: 0.01)
Cationic_Lipid_Mol_Ratio(float, Min: 0.05, Max: 0.8, Step Size: 0.01)
Phospholipid_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
Cholesterol_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
PEG_Lipid_Mol_Ratio(float, Min: 0, Max: 0.05, Step Size: 0.01)
# phys token([B, 12])
Purity_Pure(one-hot for Purity, always Pure)
Purity_Crude(one-hot for Purity, always Pure)
Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
Cargo_type_mRNA(one-hot for Cargo_type, always mRNA)
Cargo_type_pDNA(one-hot for Cargo_type, always mRNA)
Cargo_type_siRNA(one-hot for Cargo_type, always mRNA)
Target_or_delivered_gene_FFL(one-hot for Target_or_delivered_gene, always FFL)
Target_or_delivered_gene_Peptide_barcode(one-hot for Target_or_delivered_gene, always FFL)
Target_or_delivered_gene_hEPO(one-hot for Target_or_delivered_gene, always FFL)
Target_or_delivered_gene_FVII(one-hot for Target_or_delivered_gene, always FFL)
Target_or_delivered_gene_GFP(one-hot for Target_or_delivered_gene, always FFL)
# help token([B, 4])
Helper_lipid_ID_DOPE(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
Helper_lipid_ID_DOTAP(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
Helper_lipid_ID_DSPC(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
Helper_lipid_ID_MDOA(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
# exp token([B, 32])
Model_type_A549(one-hot for Model_type, always Mouse)
Model_type_BDMC(one-hot for Model_type, always Mouse)
Model_type_BMDM(one-hot for Model_type, always Mouse)
Model_type_HBEC_ALI(one-hot for Model_type, always Mouse)
Model_type_HEK293T(one-hot for Model_type, always Mouse)
Model_type_HeLa(one-hot for Model_type, always Mouse)
Model_type_IGROV1(one-hot for Model_type, always Mouse)
Model_type_Mouse(one-hot for Model_type, always Mouse)
Model_type_RAW264p7(one-hot for Model_type, always Mouse)
Delivery_target_dendritic_cell(one-hot for Delivery_target, always body)
Delivery_target_generic_cell(one-hot for Delivery_target, always body)
Delivery_target_liver(one-hot for Delivery_target, always body)
Delivery_target_lung(one-hot for Delivery_target, always body)
Delivery_target_lung_epithelium(one-hot for Delivery_target, always body)
Delivery_target_macrophage(one-hot for Delivery_target, always body)
Delivery_target_muscle(one-hot for Delivery_target, always body)
Delivery_target_spleen(one-hot for Delivery_target, always body)
Delivery_target_body(one-hot for Delivery_target, always body)
Route_of_administration_in_vitro(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
Route_of_administration_intravenous(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
Route_of_administration_intramuscular(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
Route_of_administration_intratracheal(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
Sample_organization_type_individual(one-hot for Sample_organization_type, always Individual)
Sample_organization_type_barcoded(one-hot for Sample_organization_type, always Individual)
Value_name_log_luminescence(one-hot for Value_name, always luminescence)
Value_name_luminescence(one-hot for Value_name, always luminescence)
Value_name_FFL_silencing(one-hot for Value_name, always luminescence)
Value_name_Peptide_abundance(one-hot for Value_name, always luminescence)
Value_name_hEPO(one-hot for Value_name, always luminescence)
Value_name_FVII_silencing(one-hot for Value_name, always luminescence)
Value_name_GFP_delivery(one-hot for Value_name, always luminescence)
Value_name_Discretized_luminescence(one-hot for Value_name, always luminescence)

15
app/SCORE.md Normal file
View File

@ -0,0 +1,15 @@
## regression
biodistribution(selected organ only): score = y * weight, where weight=0.3
quantified_delivery: score = (y-min)/(max-min)*weight, where weight=0.25, (min=-0.798559291, max=4.497814051056962) when route_of_administration=intravenous, (min=-0.794912427, max=10.220042980012716) when route_of_administration=intramuscular
size: score = 0 * weight if y<60, 1 * weight if 60<=y<=150, 0 * weight if y>150, where weight=0.05
## classification
encapsulation_efficiency_0: score = weight, where weight=0
encapsulation_efficiency_1: score = weight, where weight=0.02
encapsulation_efficiency_2: score = weight, where weight=0.08
pdi_0: score = weight, where weight=0.08
pdi_1: score = weight, where weight=0.02
pdi_2: score = weight, where weight=0
pdi_3: score = weight, where weight=0
toxicity_0: score=weight, where weight=0.2
toxicity_1: score=weight, where weight=0

2
app/__init__.py Normal file
View File

@ -0,0 +1,2 @@
"""LNP 配方优化应用"""

361
app/api.py Normal file
View File

@ -0,0 +1,361 @@
"""
FastAPI 配方优化 API
启动服务:
uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
"""
import os
from pathlib import Path
from typing import List, Dict, Optional
from contextlib import asynccontextmanager
import torch
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from loguru import logger
from lnp_ml.config import MODELS_DIR
from lnp_ml.modeling.predict import load_model
from app.optimize import (
optimize,
format_results,
AVAILABLE_ORGANS,
TARGET_BIODIST,
CompRanges,
ScoringWeights,
)
# ============ Pydantic Models ============
class CompRangesRequest(BaseModel):
"""组分范围配置"""
weight_ratio_min: float = Field(default=0.05, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最小值")
weight_ratio_max: float = Field(default=0.30, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最大值")
cationic_mol_min: float = Field(default=0.05, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最小值")
cationic_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最大值")
phospholipid_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="磷脂 mol 比例最小值")
phospholipid_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="磷脂 mol 比例最大值")
cholesterol_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="胆固醇 mol 比例最小值")
cholesterol_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="胆固醇 mol 比例最大值")
peg_mol_min: float = Field(default=0.00, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最小值")
peg_mol_max: float = Field(default=0.05, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最大值")
def to_comp_ranges(self) -> CompRanges:
"""转换为 CompRanges 对象"""
return CompRanges(
weight_ratio_min=self.weight_ratio_min,
weight_ratio_max=self.weight_ratio_max,
cationic_mol_min=self.cationic_mol_min,
cationic_mol_max=self.cationic_mol_max,
phospholipid_mol_min=self.phospholipid_mol_min,
phospholipid_mol_max=self.phospholipid_mol_max,
cholesterol_mol_min=self.cholesterol_mol_min,
cholesterol_mol_max=self.cholesterol_mol_max,
peg_mol_min=self.peg_mol_min,
peg_mol_max=self.peg_mol_max,
)
class ScoringWeightsRequest(BaseModel):
"""评分权重配置"""
biodist_weight: float = Field(default=1.0, ge=0.0, description="目标器官分布权重")
delivery_weight: float = Field(default=0.0, ge=0.0, description="量化递送权重")
size_weight: float = Field(default=0.0, ge=0.0, description="粒径权重 (80-150nm)")
ee_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0], description="EE 分类权重 [class0, class1, class2]")
pdi_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0, 0.0], description="PDI 分类权重 [class0, class1, class2, class3]")
toxic_class_weights: List[float] = Field(default=[0.0, 0.0], description="毒性分类权重 [无毒, 有毒]")
def to_scoring_weights(self) -> ScoringWeights:
"""转换为 ScoringWeights 对象"""
return ScoringWeights(
biodist_weight=self.biodist_weight,
delivery_weight=self.delivery_weight,
size_weight=self.size_weight,
ee_class_weights=self.ee_class_weights,
pdi_class_weights=self.pdi_class_weights,
toxic_class_weights=self.toxic_class_weights,
)
class OptimizeRequest(BaseModel):
"""优化请求"""
smiles: str = Field(..., description="Cationic lipid SMILES string")
organ: str = Field(..., description="Target organ for optimization")
top_k: int = Field(default=20, ge=1, le=100, description="Number of top formulations to return")
num_seeds: Optional[int] = Field(default=None, ge=1, le=500, description="Number of seed points from first iteration (default: top_k * 5)")
top_per_seed: int = Field(default=1, ge=1, le=10, description="Number of local best to keep per seed in refinement")
step_sizes: Optional[List[float]] = Field(default=None, description="Step sizes for each iteration (default: [0.10, 0.02, 0.01])")
comp_ranges: Optional[CompRangesRequest] = Field(default=None, description="组分范围配置(默认使用标准范围)")
routes: Optional[List[str]] = Field(default=None, description="给药途径列表 (default: ['intravenous', 'intramuscular'])")
scoring_weights: Optional[ScoringWeightsRequest] = Field(default=None, description="评分权重配置(默认仅按 biodist 排序)")
class Config:
json_schema_extra = {
"example": {
"smiles": "CC(C)NCCNC(C)C",
"organ": "liver",
"top_k": 20,
"num_seeds": None,
"top_per_seed": 1,
"step_sizes": None,
"comp_ranges": None,
"routes": None,
"scoring_weights": None
}
}
class FormulationResult(BaseModel):
"""单个配方结果"""
rank: int
target_biodist: float
composite_score: Optional[float] = None # 综合评分
cationic_lipid_to_mrna_ratio: float
cationic_lipid_mol_ratio: float
phospholipid_mol_ratio: float
cholesterol_mol_ratio: float
peg_lipid_mol_ratio: float
helper_lipid: str
route: str
all_biodist: Dict[str, float]
# 额外预测值
quantified_delivery: Optional[float] = None
size: Optional[float] = None
pdi_class: Optional[int] = None # PDI 分类 (0: <0.2, 1: 0.2-0.3, 2: 0.3-0.4, 3: >0.4)
ee_class: Optional[int] = None # EE 分类 (0: <80%, 1: 80-90%, 2: >90%)
toxic_class: Optional[int] = None # 毒性分类 (0: 无毒, 1: 有毒)
class OptimizeResponse(BaseModel):
"""优化响应"""
smiles: str
target_organ: str
formulations: List[FormulationResult]
message: str
class HealthResponse(BaseModel):
"""健康检查响应"""
status: str
model_loaded: bool
device: str
available_organs: List[str]
# ============ Global State ============
class ModelState:
"""模型状态管理"""
model = None
device = None
model_path = None
state = ModelState()
# ============ Lifespan ============
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用生命周期管理:启动时加载模型"""
# Startup
logger.info("Starting API server...")
# 确定设备
if torch.cuda.is_available():
device_str = "cuda"
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
device_str = "mps"
else:
device_str = "cpu"
# 可通过环境变量覆盖
device_str = os.environ.get("DEVICE", device_str)
state.device = torch.device(device_str)
logger.info(f"Using device: {state.device}")
# 加载模型
model_path = Path(os.environ.get("MODEL_PATH", MODELS_DIR / "final" / "model.pt"))
state.model_path = model_path
logger.info(f"Loading model from {model_path}...")
try:
state.model = load_model(model_path, state.device)
logger.success("Model loaded successfully!")
except Exception as e:
logger.error(f"Failed to load model: {e}")
raise
yield
# Shutdown
logger.info("Shutting down API server...")
state.model = None
torch.cuda.empty_cache() if torch.cuda.is_available() else None
# ============ FastAPI App ============
app = FastAPI(
title="LNP 配方优化 API",
description="基于深度学习的 LNP 纳米颗粒配方优化服务",
version="1.0.0",
lifespan=lifespan,
)
# CORS 配置
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ============ Endpoints ============
@app.get("/", response_model=HealthResponse)
async def health_check():
"""健康检查"""
return HealthResponse(
status="healthy" if state.model is not None else "model_not_loaded",
model_loaded=state.model is not None,
device=str(state.device),
available_organs=AVAILABLE_ORGANS,
)
@app.get("/organs", response_model=List[str])
async def get_available_organs():
"""获取可用的目标器官列表"""
return AVAILABLE_ORGANS
@app.post("/optimize", response_model=OptimizeResponse)
async def optimize_formulation(request: OptimizeRequest):
"""
执行配方优化
通过迭代式 Grid Search 寻找最大化目标器官 Biodistribution 的最优配方
"""
# 验证模型状态
if state.model is None:
raise HTTPException(status_code=503, detail="Model not loaded")
# 验证器官
if request.organ not in AVAILABLE_ORGANS:
raise HTTPException(
status_code=400,
detail=f"Invalid organ: {request.organ}. Available: {AVAILABLE_ORGANS}"
)
# 验证 SMILES
if not request.smiles or len(request.smiles.strip()) == 0:
raise HTTPException(status_code=400, detail="SMILES string cannot be empty")
# 验证 routes
valid_routes = ["intravenous", "intramuscular"]
if request.routes is not None:
for r in request.routes:
if r not in valid_routes:
raise HTTPException(
status_code=400,
detail=f"Invalid route: {r}. Available: {valid_routes}"
)
if len(request.routes) == 0:
raise HTTPException(status_code=400, detail="At least one route must be specified")
logger.info(f"Optimization request: organ={request.organ}, routes={request.routes}, smiles={request.smiles[:50]}...")
# 构建组分范围配置(在 try 块外验证,确保返回 400 而非 500
comp_ranges = None
if request.comp_ranges is not None:
comp_ranges = request.comp_ranges.to_comp_ranges()
# 验证范围是否合理
validation_error = comp_ranges.get_validation_error()
if validation_error:
raise HTTPException(
status_code=400,
detail=f"组分范围配置无效: {validation_error}"
)
# 构建评分权重配置
scoring_weights = None
if request.scoring_weights is not None:
scoring_weights = request.scoring_weights.to_scoring_weights()
try:
# 执行优化(层级搜索策略)
results = optimize(
smiles=request.smiles,
organ=request.organ,
model=state.model,
device=state.device,
top_k=request.top_k,
num_seeds=request.num_seeds,
top_per_seed=request.top_per_seed,
step_sizes=request.step_sizes,
comp_ranges=comp_ranges,
routes=request.routes,
scoring_weights=scoring_weights,
batch_size=256,
)
# 用于计算综合评分的权重
from app.optimize import compute_formulation_score, DEFAULT_SCORING_WEIGHTS
actual_scoring_weights = scoring_weights if scoring_weights is not None else DEFAULT_SCORING_WEIGHTS
# 转换结果
formulations = []
for i, f in enumerate(results):
formulations.append(FormulationResult(
rank=i + 1,
target_biodist=f.get_biodist(request.organ),
composite_score=compute_formulation_score(f, request.organ, actual_scoring_weights),
cationic_lipid_to_mrna_ratio=f.cationic_lipid_to_mrna_ratio,
cationic_lipid_mol_ratio=f.cationic_lipid_mol_ratio,
phospholipid_mol_ratio=f.phospholipid_mol_ratio,
cholesterol_mol_ratio=f.cholesterol_mol_ratio,
peg_lipid_mol_ratio=f.peg_lipid_mol_ratio,
helper_lipid=f.helper_lipid,
route=f.route,
all_biodist={
col.replace("Biodistribution_", ""): f.biodist_predictions.get(col, 0.0)
for col in TARGET_BIODIST
},
# 额外预测值
quantified_delivery=f.quantified_delivery,
size=f.size,
pdi_class=f.pdi_class,
ee_class=f.ee_class,
toxic_class=f.toxic_class,
))
logger.success(f"Optimization completed: {len(formulations)} formulations")
return OptimizeResponse(
smiles=request.smiles,
target_organ=request.organ,
formulations=formulations,
message=f"Successfully found top {len(formulations)} formulations for {request.organ}",
)
except Exception as e:
logger.error(f"Optimization failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.api:app",
host="0.0.0.0",
port=8000,
reload=True,
)

770
app/app.py Normal file
View File

@ -0,0 +1,770 @@
"""
Streamlit 配方优化交互界面
启动应用:
streamlit run app/app.py
Docker 环境变量:
API_URL: API 服务地址 (默认: http://localhost:8000)
"""
import io
import os
from datetime import datetime
import httpx
import pandas as pd
import streamlit as st
# ============ 配置 ============
# 从环境变量读取 API 地址,支持 Docker 环境
API_URL = os.environ.get("API_URL", "http://localhost:8000")
AVAILABLE_ORGANS = [
"liver",
"spleen",
"lung",
"heart",
"kidney",
"muscle",
"lymph_nodes",
]
ORGAN_LABELS = {
"liver": "肝脏 (Liver)",
"spleen": "脾脏 (Spleen)",
"lung": "肺 (Lung)",
"heart": "心脏 (Heart)",
"kidney": "肾脏 (Kidney)",
"muscle": "肌肉 (Muscle)",
"lymph_nodes": "淋巴结 (Lymph Nodes)",
}
AVAILABLE_ROUTES = [
"intravenous",
"intramuscular",
]
ROUTE_LABELS = {
"intravenous": "静脉注射 (Intravenous)",
"intramuscular": "肌肉注射 (Intramuscular)",
}
# ============ 页面配置 ============
st.set_page_config(
page_title="LNP 配方优化",
page_icon="🧬",
layout="wide",
initial_sidebar_state="expanded",
)
# ============ 自定义样式 ============
st.markdown("""
<style>
/* 主标题样式 */
.main-title {
font-size: 2.5rem;
font-weight: 700;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-align: center;
margin-bottom: 0.5rem;
}
/* 副标题样式 */
.sub-title {
font-size: 1.1rem;
color: #6c757d;
text-align: center;
margin-bottom: 2rem;
}
/* 结果卡片 */
.result-card {
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
border-radius: 12px;
padding: 1.5rem;
margin-bottom: 1rem;
}
/* 指标高亮 */
.metric-highlight {
font-size: 2rem;
font-weight: 700;
color: #667eea;
}
/* 侧边栏样式 */
.sidebar-section {
background: #f8f9fa;
border-radius: 8px;
padding: 1rem;
margin-bottom: 1rem;
}
/* 状态指示器 */
.status-online {
color: #28a745;
font-weight: 600;
}
.status-offline {
color: #dc3545;
font-weight: 600;
}
/* 表格样式优化 */
.dataframe {
font-size: 0.85rem;
}
</style>
""", unsafe_allow_html=True)
# ============ 辅助函数 ============
def check_api_status() -> bool:
"""检查 API 状态"""
try:
with httpx.Client(timeout=5) as client:
response = client.get(f"{API_URL}/")
return response.status_code == 200
except:
return False
def call_optimize_api(
smiles: str,
organ: str,
top_k: int = 20,
num_seeds: int = None,
top_per_seed: int = 1,
step_sizes: list = None,
comp_ranges: dict = None,
routes: list = None,
scoring_weights: dict = None,
) -> dict:
"""调用优化 API"""
payload = {
"smiles": smiles,
"organ": organ,
"top_k": top_k,
"num_seeds": num_seeds,
"top_per_seed": top_per_seed,
"step_sizes": step_sizes,
"comp_ranges": comp_ranges,
"routes": routes,
"scoring_weights": scoring_weights,
}
with httpx.Client(timeout=600) as client: # 10 分钟超时(自定义参数可能需要更长时间)
response = client.post(
f"{API_URL}/optimize",
json=payload,
)
response.raise_for_status()
return response.json()
# PDI 分类标签
PDI_CLASS_LABELS = {
0: "<0.2 (优)",
1: "0.2-0.3 (良)",
2: "0.3-0.4 (中)",
3: ">0.4 (差)",
}
# EE 分类标签
EE_CLASS_LABELS = {
0: "<50% (低)",
1: "50-80% (中)",
2: ">80% (高)",
}
# 毒性分类标签
TOXIC_CLASS_LABELS = {
0: "无毒 ✓",
1: "有毒 ⚠",
}
def format_results_dataframe(results: dict, smiles_label: str = None) -> pd.DataFrame:
"""将 API 结果转换为 DataFrame"""
formulations = results["formulations"]
target_organ = results["target_organ"]
rows = []
for f in formulations:
row = {}
# 如果有 SMILES 标签,添加到首列
if smiles_label:
row["SMILES"] = smiles_label
row.update({
"排名": f["rank"],
})
# 如果有综合评分,显示在排名后面
if f.get("composite_score") is not None:
row["综合评分"] = f"{f['composite_score']:.4f}"
row.update({
f"{target_organ}分布": f"{f['target_biodist']*100:.8f}%",
"阳离子脂质/mRNA比例": f["cationic_lipid_to_mrna_ratio"],
"阳离子脂质(mol)比例": f["cationic_lipid_mol_ratio"],
"磷脂(mol)比例": f["phospholipid_mol_ratio"],
"胆固醇(mol)比例": f["cholesterol_mol_ratio"],
"PEG脂质(mol)比例": f["peg_lipid_mol_ratio"],
"辅助脂质": f["helper_lipid"],
"给药途径": f["route"],
})
# 添加额外预测值
if f.get("quantified_delivery") is not None:
row["量化递送"] = f"{f['quantified_delivery']:.4f}"
if f.get("size") is not None:
row["粒径(nm)"] = f"{f['size']:.1f}"
if f.get("pdi_class") is not None:
row["PDI"] = PDI_CLASS_LABELS.get(f["pdi_class"], str(f["pdi_class"]))
if f.get("ee_class") is not None:
row["包封率"] = EE_CLASS_LABELS.get(f["ee_class"], str(f["ee_class"]))
if f.get("toxic_class") is not None:
row["毒性"] = TOXIC_CLASS_LABELS.get(f["toxic_class"], str(f["toxic_class"]))
# 添加其他器官的 biodist
for organ, value in f["all_biodist"].items():
if organ != target_organ:
row[f"{organ}分布"] = f"{value*100:.2f}%"
rows.append(row)
return pd.DataFrame(rows)
def create_export_csv(df: pd.DataFrame, smiles: str, organ: str) -> str:
"""创建导出用的 CSV 内容"""
# 添加元信息
meta_info = f"# LNP 配方优化结果\n# SMILES: {smiles}\n# 目标器官: {organ}\n# 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
csv_content = df.to_csv(index=False)
return meta_info + csv_content
# ============ 主界面 ============
def main():
# 标题
st.markdown('<h1 class="main-title">🧬 LNP 配方优化系统</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-title">基于深度学习的脂质纳米颗粒配方智能优选</p>', unsafe_allow_html=True)
# 检查 API 状态
api_online = check_api_status()
# ========== 侧边栏 ==========
with st.sidebar:
# st.header("⚙️ 参数设置")
# API 状态
if api_online:
st.success("🟢 API 服务在线")
else:
st.error("🔴 API 服务离线")
st.info("请先启动 API 服务:\n```\nuvicorn app.api:app --port 8000\n```")
# st.divider()
# SMILES 输入
st.subheader("🔬 分子结构")
smiles_input = st.text_area(
"输入阳离子脂质 SMILES",
value="",
height=100,
placeholder="例如: CC(C)NCCNC(C)C\n多条SMILES用英文逗号分隔: SMI1,SMI2,SMI3",
help="输入阳离子脂质的 SMILES 字符串。支持多条 SMILES用英文逗号 (,) 分隔",
)
# 示例 SMILES
# with st.expander("📋 示例 SMILES"):
# example_smiles = {
# "DLin-MC3-DMA": "CC(C)=CCCC(C)=CCCC(C)=CCN(C)CCCCCCCCOC(=O)CCCCCCC/C=C\\CCCCCCCC",
# "简单胺": "CC(C)NCCNC(C)C",
# "长链胺": "CCCCCCCCCCCCNCCNCCCCCCCCCCCC",
# }
# for name, smi in example_smiles.items():
# if st.button(f"使用 {name}", key=f"example_{name}"):
# st.session_state["smiles_input"] = smi
# st.rerun()
# st.divider()
# 目标器官选择
st.subheader("🎯 目标器官")
selected_organ = st.selectbox(
"选择优化目标器官",
options=AVAILABLE_ORGANS,
format_func=lambda x: ORGAN_LABELS.get(x, x),
index=0,
)
# 给药途径选择
st.subheader("💉 给药途径")
selected_routes = st.multiselect(
"选择给药途径",
options=AVAILABLE_ROUTES,
default=AVAILABLE_ROUTES,
format_func=lambda x: ROUTE_LABELS.get(x, x),
help="选择要搜索的给药途径,可多选。至少选择一种。",
)
if not selected_routes:
st.warning("⚠️ 请至少选择一种给药途径")
# 高级选项
with st.expander("🔧 高级选项"):
st.markdown("**输出设置**")
top_k = st.slider(
"返回配方数量 (top_k)",
min_value=5,
max_value=100,
value=20,
step=5,
help="最终返回的最优配方数量",
)
st.markdown("**搜索策略**")
num_seeds = st.slider(
"种子点数量 (num_seeds)",
min_value=10,
max_value=200,
value=top_k * 5,
step=10,
help="第一轮迭代后保留的种子点数量,更多种子点意味着更广泛的搜索",
)
top_per_seed = st.slider(
"每个种子的局部最优数 (top_per_seed)",
min_value=1,
max_value=5,
value=1,
step=1,
help="后续迭代中,每个种子点邻域保留的局部最优数量",
)
st.markdown("**迭代步长与轮数**")
use_custom_steps = st.checkbox(
"自定义迭代步长",
value=False,
help="默认步长为 [0.10, 0.02, 0.01]共3轮逐步精细化搜索。将某轮步长设为0可减少迭代轮数。",
)
if use_custom_steps:
col1, col2, col3 = st.columns(3)
with col1:
step1 = st.number_input(
"第1轮步长",
min_value=0.01, max_value=0.20, value=0.10,
step=0.01, format="%.2f",
help="第1轮为全局粗搜索步长必须大于0",
)
with col2:
step2 = st.number_input(
"第2轮步长",
min_value=0.00, max_value=0.10, value=0.02,
step=0.01, format="%.2f",
help="设为0则只进行1轮搜索",
)
with col3:
step3 = st.number_input(
"第3轮步长",
min_value=0.00, max_value=0.05, value=0.01,
step=0.01, format="%.2f",
help="设为0则只进行2轮搜索",
)
# 根据步长值构建实际的 step_sizes 列表
# step2 为 0 → 只保留 [step1]1轮
# step3 为 0 → 只保留 [step1, step2]2轮
# 都不为 0 → [step1, step2, step3]3轮
if step2 == 0.0:
step_sizes = [step1]
elif step3 == 0.0:
step_sizes = [step1, step2]
else:
step_sizes = [step1, step2, step3]
# 显示实际迭代轮数提示
st.caption(f"📌 实际迭代轮数: {len(step_sizes)} 轮,步长: {step_sizes}")
else:
step_sizes = None # 使用默认值
st.markdown("**组分范围限制**")
use_custom_ranges = st.checkbox(
"自定义组分取值范围",
value=False,
help="限制各组分的取值范围mol 比例加起来仍为 100%",
)
if use_custom_ranges:
st.caption("阳离子脂质/mRNA 重量比")
col1, col2 = st.columns(2)
with col1:
weight_ratio_min = st.number_input("最小", min_value=0.01, max_value=0.50, value=0.05, step=0.01, format="%.2f", key="wr_min")
with col2:
weight_ratio_max = st.number_input("最大", min_value=0.01, max_value=0.50, value=0.30, step=0.01, format="%.2f", key="wr_max")
st.caption("阳离子脂质 mol 比例")
col1, col2 = st.columns(2)
with col1:
cationic_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.05, step=0.05, format="%.2f", key="cat_min")
with col2:
cationic_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="cat_max")
st.caption("磷脂 mol 比例")
col1, col2 = st.columns(2)
with col1:
phospholipid_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="phos_min")
with col2:
phospholipid_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="phos_max")
st.caption("胆固醇 mol 比例")
col1, col2 = st.columns(2)
with col1:
cholesterol_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="chol_min")
with col2:
cholesterol_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="chol_max")
st.caption("PEG 脂质 mol 比例")
col1, col2 = st.columns(2)
with col1:
peg_mol_min = st.number_input("最小", min_value=0.00, max_value=0.20, value=0.00, step=0.01, format="%.2f", key="peg_min")
with col2:
peg_mol_max = st.number_input("最大", min_value=0.00, max_value=0.20, value=0.05, step=0.01, format="%.2f", key="peg_max")
comp_ranges = {
"weight_ratio_min": weight_ratio_min,
"weight_ratio_max": weight_ratio_max,
"cationic_mol_min": cationic_mol_min,
"cationic_mol_max": cationic_mol_max,
"phospholipid_mol_min": phospholipid_mol_min,
"phospholipid_mol_max": phospholipid_mol_max,
"cholesterol_mol_min": cholesterol_mol_min,
"cholesterol_mol_max": cholesterol_mol_max,
"peg_mol_min": peg_mol_min,
"peg_mol_max": peg_mol_max,
}
# 简单验证
min_sum = cationic_mol_min + phospholipid_mol_min + cholesterol_mol_min + peg_mol_min
max_sum = cationic_mol_max + phospholipid_mol_max + cholesterol_mol_max + peg_mol_max
if min_sum > 1.0 or max_sum < 1.0:
st.warning("⚠️ 当前范围设置可能无法生成有效配方mol 比例需加起来为 100%")
else:
comp_ranges = None # 使用默认值
st.markdown("**评分/排序权重**")
use_custom_scoring = st.checkbox(
"自定义评分权重",
value=False,
help="默认仅按目标器官分布排序。开启后可自定义多目标加权评分,总分 = 各项score之和。",
)
if use_custom_scoring:
st.caption("**回归任务权重**")
sw_biodist = st.number_input(
"器官分布 (Biodistribution)",
min_value=0.00, max_value=10.00, value=0.30,
step=0.05, format="%.2f", key="sw_biodist",
help="score = biodist_value × weight",
)
sw_delivery = st.number_input(
"量化递送 (Quantified Delivery)",
min_value=0.00, max_value=10.00, value=0.25,
step=0.05, format="%.2f", key="sw_delivery",
help="score = normalize(delivery, route) × weight",
)
sw_size = st.number_input(
"粒径 (Size, 80-150nm)",
min_value=0.00, max_value=10.00, value=0.05,
step=0.05, format="%.2f", key="sw_size",
help="score = (1 if 60≤size≤150 else 0) × weight",
)
st.caption("**包封率 (EE) 分类权重**")
col1, col2, col3 = st.columns(3)
with col1:
sw_ee0 = st.number_input("<50% (低)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_ee0")
with col2:
sw_ee1 = st.number_input("50-80% (中)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_ee1")
with col3:
sw_ee2 = st.number_input(">80% (高)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_ee2")
st.caption("**PDI 分类权重**")
col1, col2, col3, col4 = st.columns(4)
with col1:
sw_pdi0 = st.number_input("<0.2 (优)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_pdi0")
with col2:
sw_pdi1 = st.number_input("0.2-0.3 (良)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_pdi1")
with col3:
sw_pdi2 = st.number_input("0.3-0.4 (中)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi2")
with col4:
sw_pdi3 = st.number_input(">0.4 (差)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi3")
st.caption("**毒性分类权重**")
col1, col2 = st.columns(2)
with col1:
sw_toxic0 = st.number_input("无毒", min_value=0.00, max_value=1.00, value=0.20, step=0.05, format="%.2f", key="sw_toxic0")
with col2:
sw_toxic1 = st.number_input("有毒", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="sw_toxic1")
scoring_weights = {
"biodist_weight": sw_biodist,
"delivery_weight": sw_delivery,
"size_weight": sw_size,
"ee_class_weights": [sw_ee0, sw_ee1, sw_ee2],
"pdi_class_weights": [sw_pdi0, sw_pdi1, sw_pdi2, sw_pdi3],
"toxic_class_weights": [sw_toxic0, sw_toxic1],
}
else:
scoring_weights = None # 使用默认值(仅按 biodist 排序)
st.divider()
# 优化按钮
optimize_button = st.button(
"🚀 开始配方优选",
type="primary",
use_container_width=True,
disabled=not api_online or not smiles_input.strip() or not selected_routes,
)
# ========== 主内容区 ==========
# 使用 session state 存储结果
if "results" not in st.session_state:
st.session_state["results"] = None
if "results_df" not in st.session_state:
st.session_state["results_df"] = None
# 执行优化
if optimize_button and smiles_input.strip():
# 解析多条 SMILES用逗号分隔
smiles_list = [s.strip() for s in smiles_input.split(",") if s.strip()]
if not smiles_list:
st.error("❌ 请输入有效的 SMILES 字符串")
else:
is_multi_smiles = len(smiles_list) > 1
all_results = []
all_dfs = []
errors = []
# 进度条
progress_bar = st.progress(0)
status_text = st.empty()
for idx, smiles in enumerate(smiles_list):
status_text.text(f"🔄 正在优化 SMILES {idx + 1}/{len(smiles_list)}...")
progress_bar.progress((idx) / len(smiles_list))
try:
results = call_optimize_api(
smiles=smiles,
organ=selected_organ,
top_k=top_k,
num_seeds=num_seeds,
top_per_seed=top_per_seed,
step_sizes=step_sizes,
comp_ranges=comp_ranges,
routes=selected_routes,
scoring_weights=scoring_weights,
)
all_results.append({"smiles": smiles, "results": results})
# 为多 SMILES 模式添加 SMILES 标签
smiles_label = smiles[:30] + "..." if len(smiles) > 30 else smiles
df = format_results_dataframe(results, smiles_label if is_multi_smiles else None)
all_dfs.append(df)
except httpx.HTTPStatusError as e:
try:
error_detail = e.response.json().get("detail", str(e))
except:
error_detail = str(e)
errors.append(f"SMILES {idx + 1}: {error_detail}")
except httpx.RequestError as e:
errors.append(f"SMILES {idx + 1}: API 连接失败 - {e}")
except Exception as e:
errors.append(f"SMILES {idx + 1}: {e}")
progress_bar.progress(1.0)
status_text.empty()
progress_bar.empty()
# 显示错误
for err in errors:
st.error(f"{err}")
# 保存结果
if all_results:
st.session_state["results"] = all_results[0]["results"] if len(all_results) == 1 else all_results
st.session_state["results_df"] = pd.concat(all_dfs, ignore_index=True) if all_dfs else None
st.session_state["smiles_used"] = smiles_list
st.session_state["organ_used"] = selected_organ
st.session_state["is_multi_smiles"] = is_multi_smiles
st.success(f"✅ 优化完成!成功处理 {len(all_results)}/{len(smiles_list)} 条 SMILES")
# 显示结果
if st.session_state["results"] is not None and st.session_state["results_df"] is not None:
results = st.session_state["results"]
df = st.session_state["results_df"]
is_multi_smiles = st.session_state.get("is_multi_smiles", False)
# 结果概览
if is_multi_smiles:
# 多 SMILES 模式
col1, col2, col3 = st.columns(3)
with col1:
# 获取 target_organ从第一个结果
first_result = results[0]["results"] if isinstance(results, list) else results
target_organ = first_result["target_organ"]
st.metric(
"目标器官",
ORGAN_LABELS.get(target_organ, target_organ).split(" ")[0],
)
with col2:
st.metric(
"SMILES 数量",
len(results) if isinstance(results, list) else 1,
)
with col3:
st.metric(
"总配方数",
len(df),
)
else:
# 单 SMILES 模式
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
"目标器官",
ORGAN_LABELS.get(results["target_organ"], results["target_organ"]).split(" ")[0],
)
with col2:
best_score = results["formulations"][0]["target_biodist"]
st.metric(
"最优分布",
f"{best_score*100:.2f}%",
)
with col3:
st.metric(
"优选配方数",
len(results["formulations"]),
)
st.divider()
# 结果表格
st.subheader("📊 优选配方列表")
# 导出按钮行
col_export, col_spacer = st.columns([1, 4])
with col_export:
smiles_used = st.session_state.get("smiles_used", "")
if isinstance(smiles_used, list):
smiles_used = ",".join(smiles_used)
csv_content = create_export_csv(
df,
smiles_used,
st.session_state.get("organ_used", ""),
)
# 获取 target_organ
if is_multi_smiles:
target_organ = results[0]["results"]["target_organ"] if isinstance(results, list) else results["target_organ"]
else:
target_organ = results["target_organ"]
st.download_button(
label="📥 导出 CSV",
data=csv_content,
file_name=f"lnp_optimization_{target_organ}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv",
)
# 显示表格
st.dataframe(
df,
use_container_width=True,
hide_index=True,
height=600,
)
# 详细信息
# with st.expander("🔍 查看最优配方详情"):
# best = results["formulations"][0]
# col1, col2 = st.columns(2)
# with col1:
# st.markdown("**配方参数**")
# st.json({
# "阳离子脂质/mRNA 比例": best["cationic_lipid_to_mrna_ratio"],
# "阳离子脂质 (mol%)": best["cationic_lipid_mol_ratio"],
# "磷脂 (mol%)": best["phospholipid_mol_ratio"],
# "胆固醇 (mol%)": best["cholesterol_mol_ratio"],
# "PEG 脂质 (mol%)": best["peg_lipid_mol_ratio"],
# "辅助脂质": best["helper_lipid"],
# "给药途径": best["route"],
# })
# with col2:
# st.markdown("**各器官 Biodistribution 预测**")
# biodist_df = pd.DataFrame([
# {"器官": ORGAN_LABELS.get(k, k), "Biodistribution": f"{v:.4f}"}
# for k, v in best["all_biodist"].items()
# ])
# st.dataframe(biodist_df, hide_index=True, use_container_width=True)
else:
# 欢迎信息
st.info("👈 请在左侧输入 SMILES 并选择目标器官,然后点击「开始配方优选」")
# 使用说明
# with st.expander("📖 使用说明"):
# st.markdown("""
# ### 如何使用
# 1. **输入 SMILES**: 在左侧输入框中输入阳离子脂质的 SMILES 字符串
# 2. **选择目标器官**: 选择您希望优化的器官靶向
# 3. **点击优选**: 系统将自动搜索最优配方组合
# 4. **查看结果**: 右侧将显示 Top-20 优选配方
# 5. **导出数据**: 点击导出按钮将结果保存为 CSV 文件
# ### 优化参数
# 系统会优化以下配方参数:
# - **阳离子脂质/mRNA 比例**: 0.05 - 0.30
# - **阳离子脂质 mol 比例**: 0.05 - 0.80
# - **磷脂 mol 比例**: 0.00 - 0.80
# - **胆固醇 mol 比例**: 0.00 - 0.80
# - **PEG 脂质 mol 比例**: 0.00 - 0.05
# - **辅助脂质**: DOPE / DSPC / DOTAP
# - **给药途径**: 静脉注射 / 肌肉注射
# ### 约束条件
# mol 比例之和 = 1 (阳离子脂质 + 磷脂 + 胆固醇 + PEG 脂质)
# """)
if __name__ == "__main__":
main()

1016
app/optimize.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:15:25 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_0",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3b6dcfdd1b82a685b007ed06ad323defdb7486b24917c1ec515dbd2c5b927f08
size 6540631

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8880622451903801
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8880622451903801,0.0,0.8880622451903801
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8880622451903801 0.0 0.8880622451903801

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:21:40 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_1",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6518259648eb75f0717f93d800048f25eeb8dec9fca13d7f1c02235c2ef8bda8
size 6540631

View File

@ -0,0 +1,5 @@
{
"rmse": [
1.01673724295223
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,1.01673724295223,0.0,1.01673724295223
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 1.01673724295223 0.0 1.01673724295223

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:28:04 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_2",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5df89f8f7b97d314e05171db5891a0fb2199f3b591dbc9d44680e77811520acb
size 6540631

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8788072588544181
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8788072588544181,0.0,0.8788072588544181
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8788072588544181 0.0 0.8788072588544181

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:34:31 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_3",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e5ae4ef9d7980963742eb04c54cdf5fe3a16db9d95c22db273ad072413b651b3
size 6540631

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.9245934905333985
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.9245934905333985,0.0,0.9245934905333985
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.9245934905333985 0.0 0.9245934905333985

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:40:44 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_4",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c69ab5f85cbca9dac0f871b1d6841a199cf40eeba2f46173eff9654a8f59bc8d
size 6540631

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8268900471469541
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8268900471469541,0.0,0.8268900471469541
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8268900471469541 0.0 0.8268900471469541

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More