Merge pull request #1 from RYDE-WORK/amine_split

Amine split
Resolve conflicts
2026-03-21 09:36:32 +08:00 · 2026-02-11 16:55:20 +08:00 · 2026-02-11 16:51:21 +08:00 · 2026-02-11 16:49:28 +08:00 · 2026-02-11 16:45:21 +08:00 · 2026-01-26 11:11:52 +08:00
247 changed files with 218554 additions and 2142 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,75 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+.eggs/
+dist/
+build/
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+.pixi/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.cursor/
+
+# Git
+.git/
+.gitignore
+
+# Data (不需要打包到镜像)
+data/
+!data/.gitkeep
+
+# Notebooks
+notebooks/
+*.ipynb
+
+# Documentation
+docs/
+
+# Reports
+reports/
+
+# References
+references/
+
+# Scripts (训练脚本不需要)
+scripts/
+
+# Lock files
+pixi.lock
+
+# Tests
+tests/
+.pytest_cache/
+
+# Logs
+*.log
+logs/
+
+# Temporary files
+*.tmp
+*.temp
+.DS_Store
+
+# Models (will be mounted as volume or copied explicitly)
+# Note: models/final/ is copied in Dockerfile
+models/finetune_cv/
+models/pretrain_cv/
+models/mpnn/
+models/*.pt
+models/*.json
+!models/final/
+
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
+*.pt filter=lfs diff=lfs merge=lfs -text
--- a/63
+++ b/63
@ -0,0 +1,63 @@
+# LNP-ML Docker Image
+# 多阶段构建，支持 API 和 Streamlit 两种服务
+
+FROM python:3.8-slim AS base
+
+# 设置环境变量
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+WORKDIR /app
+
+# 安装系统依赖
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libxrender1 \
+    libxext6 \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# 复制依赖文件
+COPY requirements.txt .
+
+# 安装 Python 依赖
+RUN pip install --upgrade pip && \
+    pip install -r requirements.txt
+
+# 复制项目代码
+COPY pyproject.toml .
+COPY README.md .
+COPY LICENSE .
+COPY lnp_ml/ ./lnp_ml/
+COPY app/ ./app/
+
+# 安装项目包
+RUN pip install -e .
+
+# 复制模型文件
+COPY models/final/ ./models/final/
+
+# ============ API 服务 ============
+FROM base AS api
+
+EXPOSE 8000
+
+ENV MODEL_PATH=/app/models/final/model.pt
+
+CMD ["uvicorn", "app.api:app", "--host", "0.0.0.0", "--port", "8000"]
+
+# ============ Streamlit 服务 ============
+FROM base AS streamlit
+
+EXPOSE 8501
+
+# Streamlit 配置
+ENV STREAMLIT_SERVER_PORT=8501 \
+    STREAMLIT_SERVER_ADDRESS=0.0.0.0 \
+    STREAMLIT_SERVER_HEADLESS=true \
+    STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
+
+CMD ["streamlit", "run", "app/app.py"]
+
--- a/179
+++ b/179
@ -68,11 +68,29 @@ clean_data: requirements
 data: requirements
 	$(PYTHON_INTERPRETER) scripts/process_data.py

+## Process dataset for final training (interim -> processed/final, train:val=9:1, no test)
+.PHONY: data_final
+data_final: requirements
+	$(PYTHON_INTERPRETER) scripts/process_data_final.py
+
 ## Process external data for pretrain (external -> processed)
 .PHONY: data_pretrain
 data_pretrain: requirements
 	$(PYTHON_INTERPRETER) scripts/process_external.py

+## Process CV data for cross-validation pretrain (external/all_amine_split_for_LiON -> processed/cv)
+.PHONY: data_pretrain_cv
+data_pretrain_cv: requirements
+	$(PYTHON_INTERPRETER) scripts/process_external_cv.py
+
+## Process internal data with CV splitting (interim -> processed/cv)
+## Use SCAFFOLD_SPLIT=1 to enable amine-based scaffold splitting (default: random shuffle)
+SCAFFOLD_SPLIT_FLAG = $(if $(filter 1,$(SCAFFOLD_SPLIT)),--scaffold-split,)
+
+.PHONY: data_cv
+data_cv: requirements
+	$(PYTHON_INTERPRETER) scripts/process_data_cv.py $(SCAFFOLD_SPLIT_FLAG)
+
 # MPNN 支持：使用 USE_MPNN=1 启用 MPNN encoder
 # 例如：make pretrain USE_MPNN=1
 MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
@ -81,40 +99,185 @@ MPNN_FLAG = $(if $(USE_MPNN),--use-mpnn,)
 # 例如：make finetune FREEZE_BACKBONE=1
 FREEZE_FLAG = $(if $(FREEZE_BACKBONE),--freeze-backbone,)

+# 设备选择：使用 DEVICE=xxx 指定设备
+# 例如：make train DEVICE=cuda:0 或 make test_cv DEVICE=mps
+DEVICE_FLAG = $(if $(DEVICE),--device $(DEVICE),)
+
 ## Pretrain on external data (delivery only)
 .PHONY: pretrain
 pretrain: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG)
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain main $(MPNN_FLAG) $(DEVICE_FLAG)

 ## Evaluate pretrain model (delivery metrics)
 .PHONY: test_pretrain
 test_pretrain: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG)
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain test $(MPNN_FLAG) $(DEVICE_FLAG)
+
+## Pretrain with cross-validation (5-fold)
+.PHONY: pretrain_cv
+pretrain_cv: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv main $(MPNN_FLAG) $(DEVICE_FLAG)
+
+## Evaluate CV pretrain models on test sets (auto-detects MPNN from checkpoint)
+.PHONY: test_pretrain_cv
+test_pretrain_cv: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.pretrain_cv test $(DEVICE_FLAG)

 ## Train model (multi-task, from scratch)
 .PHONY: train
 train: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG)
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train $(MPNN_FLAG) $(DEVICE_FLAG)

 ## Finetune from pretrained checkpoint (use FREEZE_BACKBONE=1 to freeze backbone)
 .PHONY: finetune
 finetune: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG)
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
+
+## Final training using all data (train:val=9:1, no test set), with pretrained weights
+.PHONY: train_final
+train_final: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train \
+		--train-path data/processed/final/train.parquet \
+		--val-path data/processed/final/val.parquet \
+		--output-dir models/final \
+		--init-from-pretrain models/pretrain_delivery.pt \
+		$(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
+
+## Finetune with cross-validation on internal data (5-fold, amine-based split) with pretrained weights
+.PHONY: finetune_cv
+finetune_cv: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main --init-from-pretrain models/pretrain_delivery.pt $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
+
+## Train with cross-validation on internal data only (5-fold, amine-based split)
+.PHONY: train_cv
+train_cv: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv main $(FREEZE_FLAG) $(MPNN_FLAG) $(DEVICE_FLAG)
+
+
+## Evaluate CV finetuned models on test sets (auto-detects MPNN from checkpoint)
+.PHONY: test_cv
+test_cv: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train_cv test $(DEVICE_FLAG)

 ## Train with hyperparameter tuning
 .PHONY: tune
 tune: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG)
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.train --tune $(MPNN_FLAG) $(DEVICE_FLAG)
+
+# ============ 嵌套 CV + Optuna 调参（StratifiedKFold + 类权重） ============
+# 通用参数：
+#   SEED: 随机种子 (默认: 42)
+#   N_TRIALS: Optuna 试验数 (默认: 20)
+#   EPOCHS_PER_TRIAL: 每个试验的最大 epoch (默认: 30)
+#   MIN_STRATUM_COUNT: 复合分层标签的最小样本数 (默认: 5)
+#   OUTPUT_DIR: 输出目录 (根据命令有不同默认值)
+#   INIT_PRETRAIN: 预训练权重路径 (默认: models/pretrain_delivery.pt)
+
+SEED_FLAG = $(if $(SEED),--seed $(SEED),)
+N_TRIALS_FLAG = $(if $(N_TRIALS),--n-trials $(N_TRIALS),)
+EPOCHS_PER_TRIAL_FLAG = $(if $(EPOCHS_PER_TRIAL),--epochs-per-trial $(EPOCHS_PER_TRIAL),)
+MIN_STRATUM_FLAG = $(if $(MIN_STRATUM_COUNT),--min-stratum-count $(MIN_STRATUM_COUNT),)
+OUTPUT_DIR_FLAG = $(if $(OUTPUT_DIR),--output-dir $(OUTPUT_DIR),)
+USE_SWA_FLAG = $(if $(USE_SWA),--use-swa,)
+# 默认使用预训练权重，设置 NO_PRETRAIN=1 可禁用
+INIT_PRETRAIN_FLAG = $(if $(NO_PRETRAIN),,--init-from-pretrain $(or $(INIT_PRETRAIN),models/pretrain_delivery.pt))
+
+## Nested CV with Optuna: outer 5-fold (test) + inner 3-fold (tune)
+## 用于模型评估：外层 5-fold 产生无偏性能估计，内层 3-fold 做超参搜索
+## 默认加载 models/pretrain_delivery.pt 预训练权重，使用 NO_PRETRAIN=1 禁用
+## 使用示例: make nested_cv_tune DEVICE=cuda N_TRIALS=30
+.PHONY: nested_cv_tune
+nested_cv_tune: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.nested_cv_optuna \
+		$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
+		$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG)
+
+## Final training with Optuna: 3-fold CV tune + full data train
+## 用于最终模型训练：3-fold 调参后用全量数据训练（无 early-stop）
+## 默认加载 models/pretrain_delivery.pt 预训练权重，使用 NO_PRETRAIN=1 禁用
+## 使用示例: make final_optuna DEVICE=cuda N_TRIALS=30 USE_SWA=1
+.PHONY: final_optuna
+final_optuna: requirements
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.final_train_optuna_cv \
+		$(DEVICE_FLAG) $(MPNN_FLAG) $(SEED_FLAG) $(INIT_PRETRAIN_FLAG) \
+		$(N_TRIALS_FLAG) $(EPOCHS_PER_TRIAL_FLAG) $(MIN_STRATUM_FLAG) $(OUTPUT_DIR_FLAG) $(USE_SWA_FLAG)

 ## Run predictions
 .PHONY: predict
 predict: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict $(DEVICE_FLAG)

-## Test model on test set (with detailed metrics)
+## Test model on test set (with detailed metrics, auto-detects MPNN from checkpoint)
 .PHONY: test
 test: requirements
-	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test
+	$(PYTHON_INTERPRETER) -m lnp_ml.modeling.predict test $(DEVICE_FLAG)
+
+## Formulation optimization: find optimal LNP formulation for target organ
+## Usage: make optimize SMILES="CC(C)..." ORGAN=liver
+.PHONY: optimize
+optimize: requirements
+	$(PYTHON_INTERPRETER) -m app.optimize --smiles "$(SMILES)" --organ $(ORGAN) $(DEVICE_FLAG)
+
+## Start FastAPI backend server (port 8000)
+.PHONY: api
+api: requirements
+	uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
+
+## Start Streamlit frontend app (port 8501)
+.PHONY: webapp
+webapp: requirements
+	streamlit run app/app.py --server.port 8501
+
+## Start both API and webapp (run in separate terminals)
+.PHONY: serve
+serve:
+	@echo "请在两个终端分别运行:"
+	@echo "  终端 1: make api"
+	@echo "  终端 2: make webapp"
+	@echo ""
+	@echo "然后访问: http://localhost:8501"
+
+
+#################################################################################
+# DOCKER COMMANDS                                                               #
+#################################################################################
+
+## Build Docker images
+.PHONY: docker-build
+docker-build:
+	docker compose build
+
+## Start all services with Docker Compose
+.PHONY: docker-up
+docker-up:
+	docker compose up -d
+
+## Stop all Docker services
+.PHONY: docker-down
+docker-down:
+	docker compose down
+
+## View Docker logs
+.PHONY: docker-logs
+docker-logs:
+	docker compose logs -f
+
+## Build and start all services
+.PHONY: docker-serve
+docker-serve: docker-build docker-up
+	@echo ""
+	@echo "🚀 服务已启动!"
+	@echo "   - API:       http://localhost:8000"
+	@echo "   - Web 应用:  http://localhost:8501"
+	@echo ""
+	@echo "查看日志: make docker-logs"
+	@echo "停止服务: make docker-down"
+
+## Clean Docker resources (images, volumes, etc.)
+.PHONY: docker-clean
+docker-clean:
+	docker compose down -v --rmi local
+	docker system prune -f


 #################################################################################
--- a/README.md
+++ b/README.md
@ -156,5 +156,12 @@ python -m lnp_ml.modeling.train \
    └── plots.py                <- Code to create visualizations
 ```

+
+### 配方筛选
+
+```
+make optimize SMILES="CC(C)NCCNC(C)C" ORGAN=liver
+```
+
 --------

--- a/app/PARAM.md
+++ b/app/PARAM.md
@ -0,0 +1,62 @@
+## Possible Values
+
+# comp token([B, 5], the sum of the latter four ratio is always 1)
+Cationic_Lipid_to_mRNA_weight_ratio(float, Min: 0.05, Max: 0.3, Step Size: 0.01)
+Cationic_Lipid_Mol_Ratio(float, Min: 0.05, Max: 0.8, Step Size: 0.01)
+Phospholipid_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
+Cholesterol_Mol_Ratio(float, Min: 0, Max: 0.8, Step Size: 0.01)
+PEG_Lipid_Mol_Ratio(float, Min: 0, Max: 0.05, Step Size: 0.01)
+
+# phys token([B, 12])
+Purity_Pure(one-hot for Purity, always Pure)
+Purity_Crude(one-hot for Purity, always Pure)
+Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
+Mix_type_Microfluidic(one-hot for Mix_type, always Microfluidic)
+Cargo_type_mRNA(one-hot for Cargo_type, always mRNA)
+Cargo_type_pDNA(one-hot for Cargo_type, always mRNA)
+Cargo_type_siRNA(one-hot for Cargo_type, always mRNA)
+Target_or_delivered_gene_FFL(one-hot for Target_or_delivered_gene, always FFL)
+Target_or_delivered_gene_Peptide_barcode(one-hot for Target_or_delivered_gene, always FFL)
+Target_or_delivered_gene_hEPO(one-hot for Target_or_delivered_gene, always FFL)
+Target_or_delivered_gene_FVII(one-hot for Target_or_delivered_gene, always FFL)
+Target_or_delivered_gene_GFP(one-hot for Target_or_delivered_gene, always FFL)
+
+# help token([B, 4])
+Helper_lipid_ID_DOPE(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
+Helper_lipid_ID_DOTAP(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
+Helper_lipid_ID_DSPC(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
+Helper_lipid_ID_MDOA(one-hot for Helper_lipid_ID, one of {DOPE, DSPC, DOTAP})
+
+# exp token([B, 32])
+Model_type_A549(one-hot for Model_type, always Mouse)
+Model_type_BDMC(one-hot for Model_type, always Mouse)
+Model_type_BMDM(one-hot for Model_type, always Mouse)
+Model_type_HBEC_ALI(one-hot for Model_type, always Mouse)
+Model_type_HEK293T(one-hot for Model_type, always Mouse)
+Model_type_HeLa(one-hot for Model_type, always Mouse)
+Model_type_IGROV1(one-hot for Model_type, always Mouse)
+Model_type_Mouse(one-hot for Model_type, always Mouse)
+Model_type_RAW264p7(one-hot for Model_type, always Mouse)
+Delivery_target_dendritic_cell(one-hot for Delivery_target, always body)
+Delivery_target_generic_cell(one-hot for Delivery_target, always body)
+Delivery_target_liver(one-hot for Delivery_target, always body)
+Delivery_target_lung(one-hot for Delivery_target, always body)
+Delivery_target_lung_epithelium(one-hot for Delivery_target, always body)
+Delivery_target_macrophage(one-hot for Delivery_target, always body)
+Delivery_target_muscle(one-hot for Delivery_target, always body)
+Delivery_target_spleen(one-hot for Delivery_target, always body)
+Delivery_target_body(one-hot for Delivery_target, always body)
+Route_of_administration_in_vitro(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
+Route_of_administration_intravenous(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
+Route_of_administration_intramuscular(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
+Route_of_administration_intratracheal(one-hot for Route_of_administration, one of {Intravenous, Intramuscular})
+Sample_organization_type_individual(one-hot for Sample_organization_type, always Individual)
+Sample_organization_type_barcoded(one-hot for Sample_organization_type, always Individual)
+Value_name_log_luminescence(one-hot for Value_name, always luminescence)
+Value_name_luminescence(one-hot for Value_name, always luminescence)
+Value_name_FFL_silencing(one-hot for Value_name, always luminescence)
+Value_name_Peptide_abundance(one-hot for Value_name, always luminescence)
+Value_name_hEPO(one-hot for Value_name, always luminescence)
+Value_name_FVII_silencing(one-hot for Value_name, always luminescence)
+Value_name_GFP_delivery(one-hot for Value_name, always luminescence)
+Value_name_Discretized_luminescence(one-hot for Value_name, always luminescence)
--- a/app/SCORE.md
+++ b/app/SCORE.md
@ -0,0 +1,15 @@
+## regression
+biodistribution(selected organ only): score = y * weight, where weight=0.3
+quantified_delivery: score = (y-min)/(max-min)*weight, where weight=0.25, (min=-0.798559291, max=4.497814051056962) when route_of_administration=intravenous, (min=-0.794912427, max=10.220042980012716) when route_of_administration=intramuscular
+size: score = 0 * weight if y<60, 1 * weight if 60<=y<=150, 0 * weight if y>150, where weight=0.05
+
+## classification
+encapsulation_efficiency_0: score = weight, where weight=0
+encapsulation_efficiency_1: score = weight, where weight=0.02
+encapsulation_efficiency_2: score = weight, where weight=0.08
+pdi_0: score = weight, where weight=0.08
+pdi_1: score = weight, where weight=0.02
+pdi_2: score = weight, where weight=0
+pdi_3: score = weight, where weight=0
+toxicity_0: score=weight, where weight=0.2
+toxicity_1: score=weight, where weight=0
--- a/app/init.py
+++ b/app/init.py
@ -0,0 +1,2 @@
+"""LNP 配方优化应用"""
+
--- a/app/api.py
+++ b/app/api.py
@ -0,0 +1,361 @@
+"""
+FastAPI 配方优化 API
+
+启动服务:
+    uvicorn app.api:app --host 0.0.0.0 --port 8000 --reload
+"""
+
+import os
+from pathlib import Path
+from typing import List, Dict, Optional
+from contextlib import asynccontextmanager
+
+import torch
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from loguru import logger
+
+from lnp_ml.config import MODELS_DIR
+from lnp_ml.modeling.predict import load_model
+from app.optimize import (
+    optimize,
+    format_results,
+    AVAILABLE_ORGANS,
+    TARGET_BIODIST,
+    CompRanges,
+    ScoringWeights,
+)
+
+
+# ============ Pydantic Models ============
+
+class CompRangesRequest(BaseModel):
+    """组分范围配置"""
+    weight_ratio_min: float = Field(default=0.05, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最小值")
+    weight_ratio_max: float = Field(default=0.30, ge=0.01, le=0.50, description="阳离子脂质/mRNA 重量比最大值")
+    cationic_mol_min: float = Field(default=0.05, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最小值")
+    cationic_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="阳离子脂质 mol 比例最大值")
+    phospholipid_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="磷脂 mol 比例最小值")
+    phospholipid_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="磷脂 mol 比例最大值")
+    cholesterol_mol_min: float = Field(default=0.00, ge=0.00, le=1.00, description="胆固醇 mol 比例最小值")
+    cholesterol_mol_max: float = Field(default=0.80, ge=0.00, le=1.00, description="胆固醇 mol 比例最大值")
+    peg_mol_min: float = Field(default=0.00, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最小值")
+    peg_mol_max: float = Field(default=0.05, ge=0.00, le=0.20, description="PEG 脂质 mol 比例最大值")
+    
+    def to_comp_ranges(self) -> CompRanges:
+        """转换为 CompRanges 对象"""
+        return CompRanges(
+            weight_ratio_min=self.weight_ratio_min,
+            weight_ratio_max=self.weight_ratio_max,
+            cationic_mol_min=self.cationic_mol_min,
+            cationic_mol_max=self.cationic_mol_max,
+            phospholipid_mol_min=self.phospholipid_mol_min,
+            phospholipid_mol_max=self.phospholipid_mol_max,
+            cholesterol_mol_min=self.cholesterol_mol_min,
+            cholesterol_mol_max=self.cholesterol_mol_max,
+            peg_mol_min=self.peg_mol_min,
+            peg_mol_max=self.peg_mol_max,
+        )
+
+
+class ScoringWeightsRequest(BaseModel):
+    """评分权重配置"""
+    biodist_weight: float = Field(default=1.0, ge=0.0, description="目标器官分布权重")
+    delivery_weight: float = Field(default=0.0, ge=0.0, description="量化递送权重")
+    size_weight: float = Field(default=0.0, ge=0.0, description="粒径权重 (80-150nm)")
+    ee_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0], description="EE 分类权重 [class0, class1, class2]")
+    pdi_class_weights: List[float] = Field(default=[0.0, 0.0, 0.0, 0.0], description="PDI 分类权重 [class0, class1, class2, class3]")
+    toxic_class_weights: List[float] = Field(default=[0.0, 0.0], description="毒性分类权重 [无毒, 有毒]")
+    
+    def to_scoring_weights(self) -> ScoringWeights:
+        """转换为 ScoringWeights 对象"""
+        return ScoringWeights(
+            biodist_weight=self.biodist_weight,
+            delivery_weight=self.delivery_weight,
+            size_weight=self.size_weight,
+            ee_class_weights=self.ee_class_weights,
+            pdi_class_weights=self.pdi_class_weights,
+            toxic_class_weights=self.toxic_class_weights,
+        )
+
+
+class OptimizeRequest(BaseModel):
+    """优化请求"""
+    smiles: str = Field(..., description="Cationic lipid SMILES string")
+    organ: str = Field(..., description="Target organ for optimization")
+    top_k: int = Field(default=20, ge=1, le=100, description="Number of top formulations to return")
+    num_seeds: Optional[int] = Field(default=None, ge=1, le=500, description="Number of seed points from first iteration (default: top_k * 5)")
+    top_per_seed: int = Field(default=1, ge=1, le=10, description="Number of local best to keep per seed in refinement")
+    step_sizes: Optional[List[float]] = Field(default=None, description="Step sizes for each iteration (default: [0.10, 0.02, 0.01])")
+    comp_ranges: Optional[CompRangesRequest] = Field(default=None, description="组分范围配置（默认使用标准范围）")
+    routes: Optional[List[str]] = Field(default=None, description="给药途径列表 (default: ['intravenous', 'intramuscular'])")
+    scoring_weights: Optional[ScoringWeightsRequest] = Field(default=None, description="评分权重配置（默认仅按 biodist 排序）")
+    
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "smiles": "CC(C)NCCNC(C)C",
+                "organ": "liver",
+                "top_k": 20,
+                "num_seeds": None,
+                "top_per_seed": 1,
+                "step_sizes": None,
+                "comp_ranges": None,
+                "routes": None,
+                "scoring_weights": None
+            }
+        }
+
+
+class FormulationResult(BaseModel):
+    """单个配方结果"""
+    rank: int
+    target_biodist: float
+    composite_score: Optional[float] = None  # 综合评分
+    cationic_lipid_to_mrna_ratio: float
+    cationic_lipid_mol_ratio: float
+    phospholipid_mol_ratio: float
+    cholesterol_mol_ratio: float
+    peg_lipid_mol_ratio: float
+    helper_lipid: str
+    route: str
+    all_biodist: Dict[str, float]
+    # 额外预测值
+    quantified_delivery: Optional[float] = None
+    size: Optional[float] = None
+    pdi_class: Optional[int] = None   # PDI 分类 (0: <0.2, 1: 0.2-0.3, 2: 0.3-0.4, 3: >0.4)
+    ee_class: Optional[int] = None    # EE 分类 (0: <80%, 1: 80-90%, 2: >90%)
+    toxic_class: Optional[int] = None # 毒性分类 (0: 无毒, 1: 有毒)
+
+
+class OptimizeResponse(BaseModel):
+    """优化响应"""
+    smiles: str
+    target_organ: str
+    formulations: List[FormulationResult]
+    message: str
+
+
+class HealthResponse(BaseModel):
+    """健康检查响应"""
+    status: str
+    model_loaded: bool
+    device: str
+    available_organs: List[str]
+
+
+# ============ Global State ============
+
+class ModelState:
+    """模型状态管理"""
+    model = None
+    device = None
+    model_path = None
+
+
+state = ModelState()
+
+
+# ============ Lifespan ============
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """应用生命周期管理：启动时加载模型"""
+    # Startup
+    logger.info("Starting API server...")
+    
+    # 确定设备
+    if torch.cuda.is_available():
+        device_str = "cuda"
+    elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+        device_str = "mps"
+    else:
+        device_str = "cpu"
+    
+    # 可通过环境变量覆盖
+    device_str = os.environ.get("DEVICE", device_str)
+    state.device = torch.device(device_str)
+    logger.info(f"Using device: {state.device}")
+    
+    # 加载模型
+    model_path = Path(os.environ.get("MODEL_PATH", MODELS_DIR / "final" / "model.pt"))
+    state.model_path = model_path
+    
+    logger.info(f"Loading model from {model_path}...")
+    try:
+        state.model = load_model(model_path, state.device)
+        logger.success("Model loaded successfully!")
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise
+    
+    yield
+    
+    # Shutdown
+    logger.info("Shutting down API server...")
+    state.model = None
+    torch.cuda.empty_cache() if torch.cuda.is_available() else None
+
+
+# ============ FastAPI App ============
+
+app = FastAPI(
+    title="LNP 配方优化 API",
+    description="基于深度学习的 LNP 纳米颗粒配方优化服务",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+# CORS 配置
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+# ============ Endpoints ============
+
+@app.get("/", response_model=HealthResponse)
+async def health_check():
+    """健康检查"""
+    return HealthResponse(
+        status="healthy" if state.model is not None else "model_not_loaded",
+        model_loaded=state.model is not None,
+        device=str(state.device),
+        available_organs=AVAILABLE_ORGANS,
+    )
+
+
+@app.get("/organs", response_model=List[str])
+async def get_available_organs():
+    """获取可用的目标器官列表"""
+    return AVAILABLE_ORGANS
+
+
+@app.post("/optimize", response_model=OptimizeResponse)
+async def optimize_formulation(request: OptimizeRequest):
+    """
+    执行配方优化
+    
+    通过迭代式 Grid Search 寻找最大化目标器官 Biodistribution 的最优配方。
+    """
+    # 验证模型状态
+    if state.model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    
+    # 验证器官
+    if request.organ not in AVAILABLE_ORGANS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid organ: {request.organ}. Available: {AVAILABLE_ORGANS}"
+        )
+    
+    # 验证 SMILES
+    if not request.smiles or len(request.smiles.strip()) == 0:
+        raise HTTPException(status_code=400, detail="SMILES string cannot be empty")
+    
+    # 验证 routes
+    valid_routes = ["intravenous", "intramuscular"]
+    if request.routes is not None:
+        for r in request.routes:
+            if r not in valid_routes:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Invalid route: {r}. Available: {valid_routes}"
+                )
+        if len(request.routes) == 0:
+            raise HTTPException(status_code=400, detail="At least one route must be specified")
+    
+    logger.info(f"Optimization request: organ={request.organ}, routes={request.routes}, smiles={request.smiles[:50]}...")
+    
+    # 构建组分范围配置（在 try 块外验证，确保返回 400 而非 500）
+    comp_ranges = None
+    if request.comp_ranges is not None:
+        comp_ranges = request.comp_ranges.to_comp_ranges()
+        # 验证范围是否合理
+        validation_error = comp_ranges.get_validation_error()
+        if validation_error:
+            raise HTTPException(
+                status_code=400, 
+                detail=f"组分范围配置无效: {validation_error}"
+            )
+    
+    # 构建评分权重配置
+    scoring_weights = None
+    if request.scoring_weights is not None:
+        scoring_weights = request.scoring_weights.to_scoring_weights()
+    
+    try:
+        # 执行优化（层级搜索策略）
+        results = optimize(
+            smiles=request.smiles,
+            organ=request.organ,
+            model=state.model,
+            device=state.device,
+            top_k=request.top_k,
+            num_seeds=request.num_seeds,
+            top_per_seed=request.top_per_seed,
+            step_sizes=request.step_sizes,
+            comp_ranges=comp_ranges,
+            routes=request.routes,
+            scoring_weights=scoring_weights,
+            batch_size=256,
+        )
+        
+        # 用于计算综合评分的权重
+        from app.optimize import compute_formulation_score, DEFAULT_SCORING_WEIGHTS
+        actual_scoring_weights = scoring_weights if scoring_weights is not None else DEFAULT_SCORING_WEIGHTS
+        
+        # 转换结果
+        formulations = []
+        for i, f in enumerate(results):
+            formulations.append(FormulationResult(
+                rank=i + 1,
+                target_biodist=f.get_biodist(request.organ),
+                composite_score=compute_formulation_score(f, request.organ, actual_scoring_weights),
+                cationic_lipid_to_mrna_ratio=f.cationic_lipid_to_mrna_ratio,
+                cationic_lipid_mol_ratio=f.cationic_lipid_mol_ratio,
+                phospholipid_mol_ratio=f.phospholipid_mol_ratio,
+                cholesterol_mol_ratio=f.cholesterol_mol_ratio,
+                peg_lipid_mol_ratio=f.peg_lipid_mol_ratio,
+                helper_lipid=f.helper_lipid,
+                route=f.route,
+                all_biodist={
+                    col.replace("Biodistribution_", ""): f.biodist_predictions.get(col, 0.0)
+                    for col in TARGET_BIODIST
+                },
+                # 额外预测值
+                quantified_delivery=f.quantified_delivery,
+                size=f.size,
+                pdi_class=f.pdi_class,
+                ee_class=f.ee_class,
+                toxic_class=f.toxic_class,
+            ))
+        
+        logger.success(f"Optimization completed: {len(formulations)} formulations")
+        
+        return OptimizeResponse(
+            smiles=request.smiles,
+            target_organ=request.organ,
+            formulations=formulations,
+            message=f"Successfully found top {len(formulations)} formulations for {request.organ}",
+        )
+        
+    except Exception as e:
+        logger.error(f"Optimization failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "app.api:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+    )
+
--- a/app/app.py
+++ b/app/app.py
@ -0,0 +1,770 @@
+"""
+Streamlit 配方优化交互界面
+
+启动应用:
+    streamlit run app/app.py
+
+Docker 环境变量:
+    API_URL: API 服务地址 (默认: http://localhost:8000)
+"""
+
+import io
+import os
+from datetime import datetime
+
+import httpx
+import pandas as pd
+import streamlit as st
+
+# ============ 配置 ============
+
+# 从环境变量读取 API 地址，支持 Docker 环境
+API_URL = os.environ.get("API_URL", "http://localhost:8000")
+
+AVAILABLE_ORGANS = [
+    "liver",
+    "spleen", 
+    "lung",
+    "heart",
+    "kidney",
+    "muscle",
+    "lymph_nodes",
+]
+
+ORGAN_LABELS = {
+    "liver": "肝脏 (Liver)",
+    "spleen": "脾脏 (Spleen)",
+    "lung": "肺 (Lung)",
+    "heart": "心脏 (Heart)",
+    "kidney": "肾脏 (Kidney)",
+    "muscle": "肌肉 (Muscle)",
+    "lymph_nodes": "淋巴结 (Lymph Nodes)",
+}
+
+AVAILABLE_ROUTES = [
+    "intravenous",
+    "intramuscular",
+]
+
+ROUTE_LABELS = {
+    "intravenous": "静脉注射 (Intravenous)",
+    "intramuscular": "肌肉注射 (Intramuscular)",
+}
+
+# ============ 页面配置 ============
+
+st.set_page_config(
+    page_title="LNP 配方优化",
+    page_icon="🧬",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+
+# ============ 自定义样式 ============
+
+st.markdown("""
+<style>
+    /* 主标题样式 */
+    .main-title {
+        font-size: 2.5rem;
+        font-weight: 700;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        text-align: center;
+        margin-bottom: 0.5rem;
+    }
+    
+    /* 副标题样式 */
+    .sub-title {
+        font-size: 1.1rem;
+        color: #6c757d;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    
+    /* 结果卡片 */
+    .result-card {
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        border-radius: 12px;
+        padding: 1.5rem;
+        margin-bottom: 1rem;
+    }
+    
+    /* 指标高亮 */
+    .metric-highlight {
+        font-size: 2rem;
+        font-weight: 700;
+        color: #667eea;
+    }
+    
+    /* 侧边栏样式 */
+    .sidebar-section {
+        background: #f8f9fa;
+        border-radius: 8px;
+        padding: 1rem;
+        margin-bottom: 1rem;
+    }
+    
+    /* 状态指示器 */
+    .status-online {
+        color: #28a745;
+        font-weight: 600;
+    }
+    
+    .status-offline {
+        color: #dc3545;
+        font-weight: 600;
+    }
+    
+    /* 表格样式优化 */
+    .dataframe {
+        font-size: 0.85rem;
+    }
+</style>
+""", unsafe_allow_html=True)
+
+
+# ============ 辅助函数 ============
+
+def check_api_status() -> bool:
+    """检查 API 状态"""
+    try:
+        with httpx.Client(timeout=5) as client:
+            response = client.get(f"{API_URL}/")
+            return response.status_code == 200
+    except:
+        return False
+
+
+def call_optimize_api(
+    smiles: str,
+    organ: str,
+    top_k: int = 20,
+    num_seeds: int = None,
+    top_per_seed: int = 1,
+    step_sizes: list = None,
+    comp_ranges: dict = None,
+    routes: list = None,
+    scoring_weights: dict = None,
+) -> dict:
+    """调用优化 API"""
+    payload = {
+        "smiles": smiles,
+        "organ": organ,
+        "top_k": top_k,
+        "num_seeds": num_seeds,
+        "top_per_seed": top_per_seed,
+        "step_sizes": step_sizes,
+        "comp_ranges": comp_ranges,
+        "routes": routes,
+        "scoring_weights": scoring_weights,
+    }
+    
+    with httpx.Client(timeout=600) as client:  # 10 分钟超时（自定义参数可能需要更长时间）
+        response = client.post(
+            f"{API_URL}/optimize",
+            json=payload,
+        )
+        response.raise_for_status()
+        return response.json()
+
+
+# PDI 分类标签
+PDI_CLASS_LABELS = {
+    0: "<0.2 (优)",
+    1: "0.2-0.3 (良)",
+    2: "0.3-0.4 (中)",
+    3: ">0.4 (差)",
+}
+
+# EE 分类标签
+EE_CLASS_LABELS = {
+    0: "<50% (低)",
+    1: "50-80% (中)",
+    2: ">80% (高)",
+}
+
+# 毒性分类标签
+TOXIC_CLASS_LABELS = {
+    0: "无毒 ✓",
+    1: "有毒 ⚠",
+}
+
+
+def format_results_dataframe(results: dict, smiles_label: str = None) -> pd.DataFrame:
+    """将 API 结果转换为 DataFrame"""
+    formulations = results["formulations"]
+    target_organ = results["target_organ"]
+    
+    rows = []
+    for f in formulations:
+        row = {}
+        
+        # 如果有 SMILES 标签，添加到首列
+        if smiles_label:
+            row["SMILES"] = smiles_label
+        
+        row.update({
+            "排名": f["rank"],
+        })
+        # 如果有综合评分，显示在排名后面
+        if f.get("composite_score") is not None:
+            row["综合评分"] = f"{f['composite_score']:.4f}"
+        row.update({
+            f"{target_organ}分布": f"{f['target_biodist']*100:.8f}%",
+            "阳离子脂质/mRNA比例": f["cationic_lipid_to_mrna_ratio"],
+            "阳离子脂质(mol)比例": f["cationic_lipid_mol_ratio"],
+            "磷脂(mol)比例": f["phospholipid_mol_ratio"],
+            "胆固醇(mol)比例": f["cholesterol_mol_ratio"],
+            "PEG脂质(mol)比例": f["peg_lipid_mol_ratio"],
+            "辅助脂质": f["helper_lipid"],
+            "给药途径": f["route"],
+        })
+        
+        # 添加额外预测值
+        if f.get("quantified_delivery") is not None:
+            row["量化递送"] = f"{f['quantified_delivery']:.4f}"
+        if f.get("size") is not None:
+            row["粒径(nm)"] = f"{f['size']:.1f}"
+        if f.get("pdi_class") is not None:
+            row["PDI"] = PDI_CLASS_LABELS.get(f["pdi_class"], str(f["pdi_class"]))
+        if f.get("ee_class") is not None:
+            row["包封率"] = EE_CLASS_LABELS.get(f["ee_class"], str(f["ee_class"]))
+        if f.get("toxic_class") is not None:
+            row["毒性"] = TOXIC_CLASS_LABELS.get(f["toxic_class"], str(f["toxic_class"]))
+        
+        # 添加其他器官的 biodist
+        for organ, value in f["all_biodist"].items():
+            if organ != target_organ:
+                row[f"{organ}分布"] = f"{value*100:.2f}%"
+        rows.append(row)
+    
+    return pd.DataFrame(rows)
+
+
+def create_export_csv(df: pd.DataFrame, smiles: str, organ: str) -> str:
+    """创建导出用的 CSV 内容"""
+    # 添加元信息
+    meta_info = f"# LNP 配方优化结果\n# SMILES: {smiles}\n# 目标器官: {organ}\n# 导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
+    csv_content = df.to_csv(index=False)
+    return meta_info + csv_content
+
+
+# ============ 主界面 ============
+
+def main():
+    # 标题
+    st.markdown('<h1 class="main-title">🧬 LNP 配方优化系统</h1>', unsafe_allow_html=True)
+    st.markdown('<p class="sub-title">基于深度学习的脂质纳米颗粒配方智能优选</p>', unsafe_allow_html=True)
+    
+    # 检查 API 状态
+    api_online = check_api_status()
+    
+    # ========== 侧边栏 ==========
+    with st.sidebar:
+        # st.header("⚙️ 参数设置")
+        
+        # API 状态
+        if api_online:
+            st.success("🟢 API 服务在线")
+        else:
+            st.error("🔴 API 服务离线")
+            st.info("请先启动 API 服务:\n```\nuvicorn app.api:app --port 8000\n```")
+        
+        # st.divider()
+        
+        # SMILES 输入
+        st.subheader("🔬 分子结构")
+        smiles_input = st.text_area(
+            "输入阳离子脂质 SMILES",
+            value="",
+            height=100,
+            placeholder="例如: CC(C)NCCNC(C)C\n多条SMILES用英文逗号分隔: SMI1,SMI2,SMI3",
+            help="输入阳离子脂质的 SMILES 字符串。支持多条 SMILES，用英文逗号 (,) 分隔",
+        )
+        
+        # 示例 SMILES
+        # with st.expander("📋 示例 SMILES"):
+        #     example_smiles = {
+        #         "DLin-MC3-DMA": "CC(C)=CCCC(C)=CCCC(C)=CCN(C)CCCCCCCCOC(=O)CCCCCCC/C=C\\CCCCCCCC",
+        #         "简单胺": "CC(C)NCCNC(C)C",
+        #         "长链胺": "CCCCCCCCCCCCNCCNCCCCCCCCCCCC",
+        #     }
+        #     for name, smi in example_smiles.items():
+        #         if st.button(f"使用 {name}", key=f"example_{name}"):
+        #             st.session_state["smiles_input"] = smi
+        #             st.rerun()
+        
+        # st.divider()
+        
+        # 目标器官选择
+        st.subheader("🎯 目标器官")
+        selected_organ = st.selectbox(
+            "选择优化目标器官",
+            options=AVAILABLE_ORGANS,
+            format_func=lambda x: ORGAN_LABELS.get(x, x),
+            index=0,
+        )
+        
+        # 给药途径选择
+        st.subheader("💉 给药途径")
+        selected_routes = st.multiselect(
+            "选择给药途径",
+            options=AVAILABLE_ROUTES,
+            default=AVAILABLE_ROUTES,
+            format_func=lambda x: ROUTE_LABELS.get(x, x),
+            help="选择要搜索的给药途径，可多选。至少选择一种。",
+        )
+        if not selected_routes:
+            st.warning("⚠️ 请至少选择一种给药途径")
+        
+        # 高级选项
+        with st.expander("🔧 高级选项"):
+            st.markdown("**输出设置**")
+            top_k = st.slider(
+                "返回配方数量 (top_k)",
+                min_value=5,
+                max_value=100,
+                value=20,
+                step=5,
+                help="最终返回的最优配方数量",
+            )
+            
+            st.markdown("**搜索策略**")
+            num_seeds = st.slider(
+                "种子点数量 (num_seeds)",
+                min_value=10,
+                max_value=200,
+                value=top_k * 5,
+                step=10,
+                help="第一轮迭代后保留的种子点数量，更多种子点意味着更广泛的搜索",
+            )
+            
+            top_per_seed = st.slider(
+                "每个种子的局部最优数 (top_per_seed)",
+                min_value=1,
+                max_value=5,
+                value=1,
+                step=1,
+                help="后续迭代中，每个种子点邻域保留的局部最优数量",
+            )
+            
+            st.markdown("**迭代步长与轮数**")
+            use_custom_steps = st.checkbox(
+                "自定义迭代步长",
+                value=False,
+                help="默认步长为 [0.10, 0.02, 0.01]，共3轮逐步精细化搜索。将某轮步长设为0可减少迭代轮数。",
+            )
+            
+            if use_custom_steps:
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    step1 = st.number_input(
+                        "第1轮步长",
+                        min_value=0.01, max_value=0.20, value=0.10,
+                        step=0.01, format="%.2f",
+                        help="第1轮为全局粗搜索，步长必须大于0",
+                    )
+                with col2:
+                    step2 = st.number_input(
+                        "第2轮步长",
+                        min_value=0.00, max_value=0.10, value=0.02,
+                        step=0.01, format="%.2f",
+                        help="设为0则只进行1轮搜索",
+                    )
+                with col3:
+                    step3 = st.number_input(
+                        "第3轮步长",
+                        min_value=0.00, max_value=0.05, value=0.01,
+                        step=0.01, format="%.2f",
+                        help="设为0则只进行2轮搜索",
+                    )
+                
+                # 根据步长值构建实际的 step_sizes 列表
+                # step2 为 0 → 只保留 [step1]（1轮）
+                # step3 为 0 → 只保留 [step1, step2]（2轮）
+                # 都不为 0 → [step1, step2, step3]（3轮）
+                if step2 == 0.0:
+                    step_sizes = [step1]
+                elif step3 == 0.0:
+                    step_sizes = [step1, step2]
+                else:
+                    step_sizes = [step1, step2, step3]
+                
+                # 显示实际迭代轮数提示
+                st.caption(f"📌 实际迭代轮数: {len(step_sizes)} 轮，步长: {step_sizes}")
+            else:
+                step_sizes = None  # 使用默认值
+            
+            st.markdown("**组分范围限制**")
+            use_custom_ranges = st.checkbox(
+                "自定义组分取值范围",
+                value=False,
+                help="限制各组分的取值范围（mol 比例加起来仍为 100%）",
+            )
+            
+            if use_custom_ranges:
+                st.caption("阳离子脂质/mRNA 重量比")
+                col1, col2 = st.columns(2)
+                with col1:
+                    weight_ratio_min = st.number_input("最小", min_value=0.01, max_value=0.50, value=0.05, step=0.01, format="%.2f", key="wr_min")
+                with col2:
+                    weight_ratio_max = st.number_input("最大", min_value=0.01, max_value=0.50, value=0.30, step=0.01, format="%.2f", key="wr_max")
+                
+                st.caption("阳离子脂质 mol 比例")
+                col1, col2 = st.columns(2)
+                with col1:
+                    cationic_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.05, step=0.05, format="%.2f", key="cat_min")
+                with col2:
+                    cationic_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="cat_max")
+                
+                st.caption("磷脂 mol 比例")
+                col1, col2 = st.columns(2)
+                with col1:
+                    phospholipid_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="phos_min")
+                with col2:
+                    phospholipid_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="phos_max")
+                
+                st.caption("胆固醇 mol 比例")
+                col1, col2 = st.columns(2)
+                with col1:
+                    cholesterol_mol_min = st.number_input("最小", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="chol_min")
+                with col2:
+                    cholesterol_mol_max = st.number_input("最大", min_value=0.00, max_value=1.00, value=0.80, step=0.05, format="%.2f", key="chol_max")
+                
+                st.caption("PEG 脂质 mol 比例")
+                col1, col2 = st.columns(2)
+                with col1:
+                    peg_mol_min = st.number_input("最小", min_value=0.00, max_value=0.20, value=0.00, step=0.01, format="%.2f", key="peg_min")
+                with col2:
+                    peg_mol_max = st.number_input("最大", min_value=0.00, max_value=0.20, value=0.05, step=0.01, format="%.2f", key="peg_max")
+                
+                comp_ranges = {
+                    "weight_ratio_min": weight_ratio_min,
+                    "weight_ratio_max": weight_ratio_max,
+                    "cationic_mol_min": cationic_mol_min,
+                    "cationic_mol_max": cationic_mol_max,
+                    "phospholipid_mol_min": phospholipid_mol_min,
+                    "phospholipid_mol_max": phospholipid_mol_max,
+                    "cholesterol_mol_min": cholesterol_mol_min,
+                    "cholesterol_mol_max": cholesterol_mol_max,
+                    "peg_mol_min": peg_mol_min,
+                    "peg_mol_max": peg_mol_max,
+                }
+                
+                # 简单验证
+                min_sum = cationic_mol_min + phospholipid_mol_min + cholesterol_mol_min + peg_mol_min
+                max_sum = cationic_mol_max + phospholipid_mol_max + cholesterol_mol_max + peg_mol_max
+                if min_sum > 1.0 or max_sum < 1.0:
+                    st.warning("⚠️ 当前范围设置可能无法生成有效配方（mol 比例需加起来为 100%）")
+            else:
+                comp_ranges = None  # 使用默认值
+            
+            st.markdown("**评分/排序权重**")
+            use_custom_scoring = st.checkbox(
+                "自定义评分权重",
+                value=False,
+                help="默认仅按目标器官分布排序。开启后可自定义多目标加权评分，总分 = 各项score之和。",
+            )
+            
+            if use_custom_scoring:
+                st.caption("**回归任务权重**")
+                
+                sw_biodist = st.number_input(
+                    "器官分布 (Biodistribution)",
+                    min_value=0.00, max_value=10.00, value=0.30,
+                    step=0.05, format="%.2f", key="sw_biodist",
+                    help="score = biodist_value × weight",
+                )
+                sw_delivery = st.number_input(
+                    "量化递送 (Quantified Delivery)",
+                    min_value=0.00, max_value=10.00, value=0.25,
+                    step=0.05, format="%.2f", key="sw_delivery",
+                    help="score = normalize(delivery, route) × weight",
+                )
+                sw_size = st.number_input(
+                    "粒径 (Size, 80-150nm)",
+                    min_value=0.00, max_value=10.00, value=0.05,
+                    step=0.05, format="%.2f", key="sw_size",
+                    help="score = (1 if 60≤size≤150 else 0) × weight",
+                )
+                
+                st.caption("**包封率 (EE) 分类权重**")
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    sw_ee0 = st.number_input("<50% (低)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_ee0")
+                with col2:
+                    sw_ee1 = st.number_input("50-80% (中)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_ee1")
+                with col3:
+                    sw_ee2 = st.number_input(">80% (高)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_ee2")
+                
+                st.caption("**PDI 分类权重**")
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    sw_pdi0 = st.number_input("<0.2 (优)", min_value=0.00, max_value=1.00, value=0.08, step=0.01, format="%.2f", key="sw_pdi0")
+                with col2:
+                    sw_pdi1 = st.number_input("0.2-0.3 (良)", min_value=0.00, max_value=1.00, value=0.02, step=0.01, format="%.2f", key="sw_pdi1")
+                with col3:
+                    sw_pdi2 = st.number_input("0.3-0.4 (中)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi2")
+                with col4:
+                    sw_pdi3 = st.number_input(">0.4 (差)", min_value=0.00, max_value=1.00, value=0.00, step=0.01, format="%.2f", key="sw_pdi3")
+                
+                st.caption("**毒性分类权重**")
+                col1, col2 = st.columns(2)
+                with col1:
+                    sw_toxic0 = st.number_input("无毒", min_value=0.00, max_value=1.00, value=0.20, step=0.05, format="%.2f", key="sw_toxic0")
+                with col2:
+                    sw_toxic1 = st.number_input("有毒", min_value=0.00, max_value=1.00, value=0.00, step=0.05, format="%.2f", key="sw_toxic1")
+                
+                scoring_weights = {
+                    "biodist_weight": sw_biodist,
+                    "delivery_weight": sw_delivery,
+                    "size_weight": sw_size,
+                    "ee_class_weights": [sw_ee0, sw_ee1, sw_ee2],
+                    "pdi_class_weights": [sw_pdi0, sw_pdi1, sw_pdi2, sw_pdi3],
+                    "toxic_class_weights": [sw_toxic0, sw_toxic1],
+                }
+            else:
+                scoring_weights = None  # 使用默认值（仅按 biodist 排序）
+        
+        st.divider()
+        
+        # 优化按钮
+        optimize_button = st.button(
+            "🚀 开始配方优选",
+            type="primary",
+            use_container_width=True,
+            disabled=not api_online or not smiles_input.strip() or not selected_routes,
+        )
+    
+    # ========== 主内容区 ==========
+    
+    # 使用 session state 存储结果
+    if "results" not in st.session_state:
+        st.session_state["results"] = None
+    if "results_df" not in st.session_state:
+        st.session_state["results_df"] = None
+    
+    # 执行优化
+    if optimize_button and smiles_input.strip():
+        # 解析多条 SMILES（用逗号分隔）
+        smiles_list = [s.strip() for s in smiles_input.split(",") if s.strip()]
+        
+        if not smiles_list:
+            st.error("❌ 请输入有效的 SMILES 字符串")
+        else:
+            is_multi_smiles = len(smiles_list) > 1
+            all_results = []
+            all_dfs = []
+            errors = []
+            
+            # 进度条
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            
+            for idx, smiles in enumerate(smiles_list):
+                status_text.text(f"🔄 正在优化 SMILES {idx + 1}/{len(smiles_list)}...")
+                progress_bar.progress((idx) / len(smiles_list))
+                
+                try:
+                    results = call_optimize_api(
+                        smiles=smiles,
+                        organ=selected_organ,
+                        top_k=top_k,
+                        num_seeds=num_seeds,
+                        top_per_seed=top_per_seed,
+                        step_sizes=step_sizes,
+                        comp_ranges=comp_ranges,
+                        routes=selected_routes,
+                        scoring_weights=scoring_weights,
+                    )
+                    all_results.append({"smiles": smiles, "results": results})
+                    
+                    # 为多 SMILES 模式添加 SMILES 标签
+                    smiles_label = smiles[:30] + "..." if len(smiles) > 30 else smiles
+                    df = format_results_dataframe(results, smiles_label if is_multi_smiles else None)
+                    all_dfs.append(df)
+                    
+                except httpx.HTTPStatusError as e:
+                    try:
+                        error_detail = e.response.json().get("detail", str(e))
+                    except:
+                        error_detail = str(e)
+                    errors.append(f"SMILES {idx + 1}: {error_detail}")
+                except httpx.RequestError as e:
+                    errors.append(f"SMILES {idx + 1}: API 连接失败 - {e}")
+                except Exception as e:
+                    errors.append(f"SMILES {idx + 1}: {e}")
+            
+            progress_bar.progress(1.0)
+            status_text.empty()
+            progress_bar.empty()
+            
+            # 显示错误
+            for err in errors:
+                st.error(f"❌ {err}")
+            
+            # 保存结果
+            if all_results:
+                st.session_state["results"] = all_results[0]["results"] if len(all_results) == 1 else all_results
+                st.session_state["results_df"] = pd.concat(all_dfs, ignore_index=True) if all_dfs else None
+                st.session_state["smiles_used"] = smiles_list
+                st.session_state["organ_used"] = selected_organ
+                st.session_state["is_multi_smiles"] = is_multi_smiles
+                st.success(f"✅ 优化完成！成功处理 {len(all_results)}/{len(smiles_list)} 条 SMILES")
+    
+    # 显示结果
+    if st.session_state["results"] is not None and st.session_state["results_df"] is not None:
+        results = st.session_state["results"]
+        df = st.session_state["results_df"]
+        is_multi_smiles = st.session_state.get("is_multi_smiles", False)
+        
+        # 结果概览
+        if is_multi_smiles:
+            # 多 SMILES 模式
+            col1, col2, col3 = st.columns(3)
+            
+            with col1:
+                # 获取 target_organ（从第一个结果）
+                first_result = results[0]["results"] if isinstance(results, list) else results
+                target_organ = first_result["target_organ"]
+                st.metric(
+                    "目标器官",
+                    ORGAN_LABELS.get(target_organ, target_organ).split(" ")[0],
+                )
+            
+            with col2:
+                st.metric(
+                    "SMILES 数量",
+                    len(results) if isinstance(results, list) else 1,
+                )
+            
+            with col3:
+                st.metric(
+                    "总配方数",
+                    len(df),
+                )
+        else:
+            # 单 SMILES 模式
+            col1, col2, col3 = st.columns(3)
+            
+            with col1:
+                st.metric(
+                    "目标器官",
+                    ORGAN_LABELS.get(results["target_organ"], results["target_organ"]).split(" ")[0],
+                )
+            
+            with col2:
+                best_score = results["formulations"][0]["target_biodist"]
+                st.metric(
+                    "最优分布",
+                    f"{best_score*100:.2f}%",
+                )
+            
+            with col3:
+                st.metric(
+                    "优选配方数",
+                    len(results["formulations"]),
+                )
+        
+        st.divider()
+        
+        # 结果表格
+        st.subheader("📊 优选配方列表")
+        
+        # 导出按钮行
+        col_export, col_spacer = st.columns([1, 4])
+        with col_export:
+            smiles_used = st.session_state.get("smiles_used", "")
+            if isinstance(smiles_used, list):
+                smiles_used = ",".join(smiles_used)
+            
+            csv_content = create_export_csv(
+                df,
+                smiles_used,
+                st.session_state.get("organ_used", ""),
+            )
+            
+            # 获取 target_organ
+            if is_multi_smiles:
+                target_organ = results[0]["results"]["target_organ"] if isinstance(results, list) else results["target_organ"]
+            else:
+                target_organ = results["target_organ"]
+            
+            st.download_button(
+                label="📥 导出 CSV",
+                data=csv_content,
+                file_name=f"lnp_optimization_{target_organ}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                mime="text/csv",
+            )
+        
+        # 显示表格
+        st.dataframe(
+            df,
+            use_container_width=True,
+            hide_index=True,
+            height=600,
+        )
+        
+        # 详细信息
+        # with st.expander("🔍 查看最优配方详情"):
+        #     best = results["formulations"][0]
+            
+        #     col1, col2 = st.columns(2)
+            
+        #     with col1:
+        #         st.markdown("**配方参数**")
+        #         st.json({
+        #             "阳离子脂质/mRNA 比例": best["cationic_lipid_to_mrna_ratio"],
+        #             "阳离子脂质 (mol%)": best["cationic_lipid_mol_ratio"],
+        #             "磷脂 (mol%)": best["phospholipid_mol_ratio"],
+        #             "胆固醇 (mol%)": best["cholesterol_mol_ratio"],
+        #             "PEG 脂质 (mol%)": best["peg_lipid_mol_ratio"],
+        #             "辅助脂质": best["helper_lipid"],
+        #             "给药途径": best["route"],
+        #         })
+            
+        #     with col2:
+        #         st.markdown("**各器官 Biodistribution 预测**")
+        #         biodist_df = pd.DataFrame([
+        #             {"器官": ORGAN_LABELS.get(k, k), "Biodistribution": f"{v:.4f}"}
+        #             for k, v in best["all_biodist"].items()
+        #         ])
+        #         st.dataframe(biodist_df, hide_index=True, use_container_width=True)
+    
+    else:
+        # 欢迎信息
+        st.info("👈 请在左侧输入 SMILES 并选择目标器官，然后点击「开始配方优选」")
+        
+        # 使用说明
+        # with st.expander("📖 使用说明"):
+        #     st.markdown("""
+        #     ### 如何使用
+            
+        #     1. **输入 SMILES**: 在左侧输入框中输入阳离子脂质的 SMILES 字符串
+        #     2. **选择目标器官**: 选择您希望优化的器官靶向
+        #     3. **点击优选**: 系统将自动搜索最优配方组合
+        #     4. **查看结果**: 右侧将显示 Top-20 优选配方
+        #     5. **导出数据**: 点击导出按钮将结果保存为 CSV 文件
+            
+        #     ### 优化参数
+            
+        #     系统会优化以下配方参数:
+        #     - **阳离子脂质/mRNA 比例**: 0.05 - 0.30
+        #     - **阳离子脂质 mol 比例**: 0.05 - 0.80
+        #     - **磷脂 mol 比例**: 0.00 - 0.80
+        #     - **胆固醇 mol 比例**: 0.00 - 0.80
+        #     - **PEG 脂质 mol 比例**: 0.00 - 0.05
+        #     - **辅助脂质**: DOPE / DSPC / DOTAP
+        #     - **给药途径**: 静脉注射 / 肌肉注射
+            
+        #     ### 约束条件
+            
+        #     mol 比例之和 = 1 (阳离子脂质 + 磷脂 + 胆固醇 + PEG 脂质)
+        #     """)
+
+
+if __name__ == "__main__":
+    main()
+
--- a/app/optimize.py
+++ b/app/optimize.py
--- a/data/external/all_amine_split_for_LiON/cv_0/args.json
+++ b/data/external/all_amine_split_for_LiON/cv_0/args.json
@ -0,0 +1,165 @@
+{
+    "activation": "ReLU",
+    "adding_bond_types": true,
+    "adding_h": false,
+    "aggregation": "mean",
+    "aggregation_norm": 100,
+    "atom_constraints": [],
+    "atom_descriptor_scaling": true,
+    "atom_descriptors": null,
+    "atom_descriptors_path": null,
+    "atom_descriptors_size": 0,
+    "atom_features_size": 0,
+    "atom_messages": false,
+    "atom_targets": [],
+    "batch_size": 50,
+    "bias": false,
+    "bias_solvent": false,
+    "bond_constraints": [],
+    "bond_descriptor_scaling": true,
+    "bond_descriptors": null,
+    "bond_descriptors_path": null,
+    "bond_descriptors_size": 0,
+    "bond_features_size": 0,
+    "bond_targets": [],
+    "cache_cutoff": 10000,
+    "checkpoint_dir": null,
+    "checkpoint_frzn": null,
+    "checkpoint_path": null,
+    "checkpoint_paths": null,
+    "class_balance": false,
+    "config_path": "../data/args_files/optimized_configs.json",
+    "constraints_path": null,
+    "crossval_index_dir": null,
+    "crossval_index_file": null,
+    "crossval_index_sets": null,
+    "cuda": true,
+    "data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train.csv",
+    "data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train_weights.csv",
+    "dataset_type": "regression",
+    "depth": 4,
+    "depth_solvent": 3,
+    "device": {
+        "_string": "cuda",
+        "_type": "python_object (type = device)",
+        "_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
+    },
+    "dropout": 0.1,
+    "empty_cache": false,
+    "ensemble_size": 1,
+    "epochs": 50,
+    "evidential_regularization": 0,
+    "explicit_h": false,
+    "extra_metrics": [],
+    "features_generator": null,
+    "features_only": false,
+    "features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_0/train_extra_x.csv"
+    ],
+    "features_scaling": true,
+    "features_size": null,
+    "ffn_hidden_size": 600,
+    "ffn_num_layers": 3,
+    "final_lr": 0.0001,
+    "folds_file": null,
+    "freeze_first_only": false,
+    "frzn_ffn_layers": 0,
+    "gpu": null,
+    "grad_clip": null,
+    "hidden_size": 600,
+    "hidden_size_solvent": 300,
+    "ignore_columns": null,
+    "ignore_nan_metrics": false,
+    "init_lr": 0.0001,
+    "is_atom_bond_targets": false,
+    "keeping_atom_map": false,
+    "log_frequency": 10,
+    "loss_function": "mse",
+    "max_data_size": null,
+    "max_lr": 0.001,
+    "metric": "rmse",
+    "metrics": [
+        "rmse"
+    ],
+    "minimize_score": true,
+    "mpn_shared": false,
+    "multiclass_num_classes": 3,
+    "no_adding_bond_types": false,
+    "no_atom_descriptor_scaling": false,
+    "no_bond_descriptor_scaling": false,
+    "no_cache_mol": false,
+    "no_cuda": false,
+    "no_features_scaling": false,
+    "no_shared_atom_bond_ffn": false,
+    "num_folds": 1,
+    "num_lrs": 1,
+    "num_tasks": 1,
+    "num_workers": 8,
+    "number_of_molecules": 1,
+    "overwrite_default_atom_features": false,
+    "overwrite_default_bond_features": false,
+    "phase_features_path": null,
+    "pytorch_seed": 0,
+    "quantile_loss_alpha": 0.1,
+    "quantiles": [],
+    "quiet": false,
+    "reaction": false,
+    "reaction_mode": "reac_diff",
+    "reaction_solvent": false,
+    "reproducibility": {
+        "command_line": "python main_script.py train all_amine_split_for_paper",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
+        "git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
+        "time": "Tue Jul 30 10:15:25 2024"
+    },
+    "resume_experiment": false,
+    "save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_0",
+    "save_preds": false,
+    "save_smiles_splits": false,
+    "seed": 42,
+    "separate_test_atom_descriptors_path": null,
+    "separate_test_bond_descriptors_path": null,
+    "separate_test_constraints_path": null,
+    "separate_test_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_0/test_extra_x.csv"
+    ],
+    "separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/test.csv",
+    "separate_test_phase_features_path": null,
+    "separate_val_atom_descriptors_path": null,
+    "separate_val_bond_descriptors_path": null,
+    "separate_val_constraints_path": null,
+    "separate_val_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_0/valid_extra_x.csv"
+    ],
+    "separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/valid.csv",
+    "separate_val_phase_features_path": null,
+    "shared_atom_bond_ffn": true,
+    "show_individual_scores": false,
+    "smiles_columns": [
+        "smiles"
+    ],
+    "spectra_activation": "exp",
+    "spectra_phase_mask_path": null,
+    "spectra_target_floor": 1e-08,
+    "split_key_molecule": 0,
+    "split_sizes": [
+        1.0,
+        0.0,
+        0.0
+    ],
+    "split_type": "random",
+    "target_columns": null,
+    "target_weights": null,
+    "task_names": [
+        "quantified_delivery"
+    ],
+    "test": false,
+    "test_fold_index": null,
+    "train_data_size": null,
+    "undirected": false,
+    "use_input_features": true,
+    "val_fold_index": null,
+    "warmup_epochs": 2.0,
+    "weights_ffn_num_layers": 2
+}
--- a/data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/events.out.tfevents.1722348927.andersonxps
+++ b/data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/events.out.tfevents.1722348927.andersonxps
--- a/data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/model.pt
+++ b/data/external/all_amine_split_for_LiON/cv_0/fold_0/model_0/model.pt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b6dcfdd1b82a685b007ed06ad323defdb7486b24917c1ec515dbd2c5b927f08
+size 6540631
--- a/data/external/all_amine_split_for_LiON/cv_0/fold_0/test_scores.json
+++ b/data/external/all_amine_split_for_LiON/cv_0/fold_0/test_scores.json
@ -0,0 +1,5 @@
+{
+    "rmse": [
+        0.8880622451903801
+    ]
+}
--- a/data/external/all_amine_split_for_LiON/cv_0/preds.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/preds.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/test.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/test.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/test_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/test_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/test_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/test_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/test_scores.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/test_scores.csv
@ -0,0 +1,2 @@
+Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
+quantified_delivery,0.8880622451903801,0.0,0.8880622451903801
--- a/data/external/all_amine_split_for_LiON/cv_0/test_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/test_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/train.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/train.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/train_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/train_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/train_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/train_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/train_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/train_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/valid.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/valid.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/valid_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/valid_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/valid_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/valid_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_0/valid_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_0/valid_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/args.json
+++ b/data/external/all_amine_split_for_LiON/cv_1/args.json
@ -0,0 +1,165 @@
+{
+    "activation": "ReLU",
+    "adding_bond_types": true,
+    "adding_h": false,
+    "aggregation": "mean",
+    "aggregation_norm": 100,
+    "atom_constraints": [],
+    "atom_descriptor_scaling": true,
+    "atom_descriptors": null,
+    "atom_descriptors_path": null,
+    "atom_descriptors_size": 0,
+    "atom_features_size": 0,
+    "atom_messages": false,
+    "atom_targets": [],
+    "batch_size": 50,
+    "bias": false,
+    "bias_solvent": false,
+    "bond_constraints": [],
+    "bond_descriptor_scaling": true,
+    "bond_descriptors": null,
+    "bond_descriptors_path": null,
+    "bond_descriptors_size": 0,
+    "bond_features_size": 0,
+    "bond_targets": [],
+    "cache_cutoff": 10000,
+    "checkpoint_dir": null,
+    "checkpoint_frzn": null,
+    "checkpoint_path": null,
+    "checkpoint_paths": null,
+    "class_balance": false,
+    "config_path": "../data/args_files/optimized_configs.json",
+    "constraints_path": null,
+    "crossval_index_dir": null,
+    "crossval_index_file": null,
+    "crossval_index_sets": null,
+    "cuda": true,
+    "data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train.csv",
+    "data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train_weights.csv",
+    "dataset_type": "regression",
+    "depth": 4,
+    "depth_solvent": 3,
+    "device": {
+        "_string": "cuda",
+        "_type": "python_object (type = device)",
+        "_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
+    },
+    "dropout": 0.1,
+    "empty_cache": false,
+    "ensemble_size": 1,
+    "epochs": 50,
+    "evidential_regularization": 0,
+    "explicit_h": false,
+    "extra_metrics": [],
+    "features_generator": null,
+    "features_only": false,
+    "features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_1/train_extra_x.csv"
+    ],
+    "features_scaling": true,
+    "features_size": null,
+    "ffn_hidden_size": 600,
+    "ffn_num_layers": 3,
+    "final_lr": 0.0001,
+    "folds_file": null,
+    "freeze_first_only": false,
+    "frzn_ffn_layers": 0,
+    "gpu": null,
+    "grad_clip": null,
+    "hidden_size": 600,
+    "hidden_size_solvent": 300,
+    "ignore_columns": null,
+    "ignore_nan_metrics": false,
+    "init_lr": 0.0001,
+    "is_atom_bond_targets": false,
+    "keeping_atom_map": false,
+    "log_frequency": 10,
+    "loss_function": "mse",
+    "max_data_size": null,
+    "max_lr": 0.001,
+    "metric": "rmse",
+    "metrics": [
+        "rmse"
+    ],
+    "minimize_score": true,
+    "mpn_shared": false,
+    "multiclass_num_classes": 3,
+    "no_adding_bond_types": false,
+    "no_atom_descriptor_scaling": false,
+    "no_bond_descriptor_scaling": false,
+    "no_cache_mol": false,
+    "no_cuda": false,
+    "no_features_scaling": false,
+    "no_shared_atom_bond_ffn": false,
+    "num_folds": 1,
+    "num_lrs": 1,
+    "num_tasks": 1,
+    "num_workers": 8,
+    "number_of_molecules": 1,
+    "overwrite_default_atom_features": false,
+    "overwrite_default_bond_features": false,
+    "phase_features_path": null,
+    "pytorch_seed": 0,
+    "quantile_loss_alpha": 0.1,
+    "quantiles": [],
+    "quiet": false,
+    "reaction": false,
+    "reaction_mode": "reac_diff",
+    "reaction_solvent": false,
+    "reproducibility": {
+        "command_line": "python main_script.py train all_amine_split_for_paper",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
+        "git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
+        "time": "Tue Jul 30 10:21:40 2024"
+    },
+    "resume_experiment": false,
+    "save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_1",
+    "save_preds": false,
+    "save_smiles_splits": false,
+    "seed": 42,
+    "separate_test_atom_descriptors_path": null,
+    "separate_test_bond_descriptors_path": null,
+    "separate_test_constraints_path": null,
+    "separate_test_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_1/test_extra_x.csv"
+    ],
+    "separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/test.csv",
+    "separate_test_phase_features_path": null,
+    "separate_val_atom_descriptors_path": null,
+    "separate_val_bond_descriptors_path": null,
+    "separate_val_constraints_path": null,
+    "separate_val_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_1/valid_extra_x.csv"
+    ],
+    "separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/valid.csv",
+    "separate_val_phase_features_path": null,
+    "shared_atom_bond_ffn": true,
+    "show_individual_scores": false,
+    "smiles_columns": [
+        "smiles"
+    ],
+    "spectra_activation": "exp",
+    "spectra_phase_mask_path": null,
+    "spectra_target_floor": 1e-08,
+    "split_key_molecule": 0,
+    "split_sizes": [
+        1.0,
+        0.0,
+        0.0
+    ],
+    "split_type": "random",
+    "target_columns": null,
+    "target_weights": null,
+    "task_names": [
+        "quantified_delivery"
+    ],
+    "test": false,
+    "test_fold_index": null,
+    "train_data_size": null,
+    "undirected": false,
+    "use_input_features": true,
+    "val_fold_index": null,
+    "warmup_epochs": 2.0,
+    "weights_ffn_num_layers": 2
+}
--- a/data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/events.out.tfevents.1722349301.andersonxps
+++ b/data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/events.out.tfevents.1722349301.andersonxps
--- a/data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/model.pt
+++ b/data/external/all_amine_split_for_LiON/cv_1/fold_0/model_0/model.pt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6518259648eb75f0717f93d800048f25eeb8dec9fca13d7f1c02235c2ef8bda8
+size 6540631
--- a/data/external/all_amine_split_for_LiON/cv_1/fold_0/test_scores.json
+++ b/data/external/all_amine_split_for_LiON/cv_1/fold_0/test_scores.json
@ -0,0 +1,5 @@
+{
+    "rmse": [
+        1.01673724295223
+    ]
+}
--- a/data/external/all_amine_split_for_LiON/cv_1/preds.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/preds.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/test.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/test.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/test_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/test_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/test_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/test_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/test_scores.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/test_scores.csv
@ -0,0 +1,2 @@
+Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
+quantified_delivery,1.01673724295223,0.0,1.01673724295223
--- a/data/external/all_amine_split_for_LiON/cv_1/test_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/test_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/train.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/train.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/train_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/train_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/train_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/train_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/train_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/train_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/valid.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/valid.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/valid_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/valid_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/valid_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/valid_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_1/valid_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_1/valid_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/args.json
+++ b/data/external/all_amine_split_for_LiON/cv_2/args.json
@ -0,0 +1,165 @@
+{
+    "activation": "ReLU",
+    "adding_bond_types": true,
+    "adding_h": false,
+    "aggregation": "mean",
+    "aggregation_norm": 100,
+    "atom_constraints": [],
+    "atom_descriptor_scaling": true,
+    "atom_descriptors": null,
+    "atom_descriptors_path": null,
+    "atom_descriptors_size": 0,
+    "atom_features_size": 0,
+    "atom_messages": false,
+    "atom_targets": [],
+    "batch_size": 50,
+    "bias": false,
+    "bias_solvent": false,
+    "bond_constraints": [],
+    "bond_descriptor_scaling": true,
+    "bond_descriptors": null,
+    "bond_descriptors_path": null,
+    "bond_descriptors_size": 0,
+    "bond_features_size": 0,
+    "bond_targets": [],
+    "cache_cutoff": 10000,
+    "checkpoint_dir": null,
+    "checkpoint_frzn": null,
+    "checkpoint_path": null,
+    "checkpoint_paths": null,
+    "class_balance": false,
+    "config_path": "../data/args_files/optimized_configs.json",
+    "constraints_path": null,
+    "crossval_index_dir": null,
+    "crossval_index_file": null,
+    "crossval_index_sets": null,
+    "cuda": true,
+    "data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train.csv",
+    "data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train_weights.csv",
+    "dataset_type": "regression",
+    "depth": 4,
+    "depth_solvent": 3,
+    "device": {
+        "_string": "cuda",
+        "_type": "python_object (type = device)",
+        "_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
+    },
+    "dropout": 0.1,
+    "empty_cache": false,
+    "ensemble_size": 1,
+    "epochs": 50,
+    "evidential_regularization": 0,
+    "explicit_h": false,
+    "extra_metrics": [],
+    "features_generator": null,
+    "features_only": false,
+    "features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_2/train_extra_x.csv"
+    ],
+    "features_scaling": true,
+    "features_size": null,
+    "ffn_hidden_size": 600,
+    "ffn_num_layers": 3,
+    "final_lr": 0.0001,
+    "folds_file": null,
+    "freeze_first_only": false,
+    "frzn_ffn_layers": 0,
+    "gpu": null,
+    "grad_clip": null,
+    "hidden_size": 600,
+    "hidden_size_solvent": 300,
+    "ignore_columns": null,
+    "ignore_nan_metrics": false,
+    "init_lr": 0.0001,
+    "is_atom_bond_targets": false,
+    "keeping_atom_map": false,
+    "log_frequency": 10,
+    "loss_function": "mse",
+    "max_data_size": null,
+    "max_lr": 0.001,
+    "metric": "rmse",
+    "metrics": [
+        "rmse"
+    ],
+    "minimize_score": true,
+    "mpn_shared": false,
+    "multiclass_num_classes": 3,
+    "no_adding_bond_types": false,
+    "no_atom_descriptor_scaling": false,
+    "no_bond_descriptor_scaling": false,
+    "no_cache_mol": false,
+    "no_cuda": false,
+    "no_features_scaling": false,
+    "no_shared_atom_bond_ffn": false,
+    "num_folds": 1,
+    "num_lrs": 1,
+    "num_tasks": 1,
+    "num_workers": 8,
+    "number_of_molecules": 1,
+    "overwrite_default_atom_features": false,
+    "overwrite_default_bond_features": false,
+    "phase_features_path": null,
+    "pytorch_seed": 0,
+    "quantile_loss_alpha": 0.1,
+    "quantiles": [],
+    "quiet": false,
+    "reaction": false,
+    "reaction_mode": "reac_diff",
+    "reaction_solvent": false,
+    "reproducibility": {
+        "command_line": "python main_script.py train all_amine_split_for_paper",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
+        "git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
+        "time": "Tue Jul 30 10:28:04 2024"
+    },
+    "resume_experiment": false,
+    "save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_2",
+    "save_preds": false,
+    "save_smiles_splits": false,
+    "seed": 42,
+    "separate_test_atom_descriptors_path": null,
+    "separate_test_bond_descriptors_path": null,
+    "separate_test_constraints_path": null,
+    "separate_test_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_2/test_extra_x.csv"
+    ],
+    "separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/test.csv",
+    "separate_test_phase_features_path": null,
+    "separate_val_atom_descriptors_path": null,
+    "separate_val_bond_descriptors_path": null,
+    "separate_val_constraints_path": null,
+    "separate_val_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_2/valid_extra_x.csv"
+    ],
+    "separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/valid.csv",
+    "separate_val_phase_features_path": null,
+    "shared_atom_bond_ffn": true,
+    "show_individual_scores": false,
+    "smiles_columns": [
+        "smiles"
+    ],
+    "spectra_activation": "exp",
+    "spectra_phase_mask_path": null,
+    "spectra_target_floor": 1e-08,
+    "split_key_molecule": 0,
+    "split_sizes": [
+        1.0,
+        0.0,
+        0.0
+    ],
+    "split_type": "random",
+    "target_columns": null,
+    "target_weights": null,
+    "task_names": [
+        "quantified_delivery"
+    ],
+    "test": false,
+    "test_fold_index": null,
+    "train_data_size": null,
+    "undirected": false,
+    "use_input_features": true,
+    "val_fold_index": null,
+    "warmup_epochs": 2.0,
+    "weights_ffn_num_layers": 2
+}
--- a/data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/events.out.tfevents.1722349684.andersonxps
+++ b/data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/events.out.tfevents.1722349684.andersonxps
--- a/data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/model.pt
+++ b/data/external/all_amine_split_for_LiON/cv_2/fold_0/model_0/model.pt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5df89f8f7b97d314e05171db5891a0fb2199f3b591dbc9d44680e77811520acb
+size 6540631
--- a/data/external/all_amine_split_for_LiON/cv_2/fold_0/test_scores.json
+++ b/data/external/all_amine_split_for_LiON/cv_2/fold_0/test_scores.json
@ -0,0 +1,5 @@
+{
+    "rmse": [
+        0.8788072588544181
+    ]
+}
--- a/data/external/all_amine_split_for_LiON/cv_2/preds.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/preds.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/test.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/test.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/test_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/test_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/test_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/test_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/test_scores.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/test_scores.csv
@ -0,0 +1,2 @@
+Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
+quantified_delivery,0.8788072588544181,0.0,0.8788072588544181
--- a/data/external/all_amine_split_for_LiON/cv_2/test_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/test_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/train.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/train.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/train_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/train_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/train_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/train_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/train_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/train_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/valid.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/valid.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/valid_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/valid_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/valid_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/valid_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_2/valid_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_2/valid_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/args.json
+++ b/data/external/all_amine_split_for_LiON/cv_3/args.json
@ -0,0 +1,165 @@
+{
+    "activation": "ReLU",
+    "adding_bond_types": true,
+    "adding_h": false,
+    "aggregation": "mean",
+    "aggregation_norm": 100,
+    "atom_constraints": [],
+    "atom_descriptor_scaling": true,
+    "atom_descriptors": null,
+    "atom_descriptors_path": null,
+    "atom_descriptors_size": 0,
+    "atom_features_size": 0,
+    "atom_messages": false,
+    "atom_targets": [],
+    "batch_size": 50,
+    "bias": false,
+    "bias_solvent": false,
+    "bond_constraints": [],
+    "bond_descriptor_scaling": true,
+    "bond_descriptors": null,
+    "bond_descriptors_path": null,
+    "bond_descriptors_size": 0,
+    "bond_features_size": 0,
+    "bond_targets": [],
+    "cache_cutoff": 10000,
+    "checkpoint_dir": null,
+    "checkpoint_frzn": null,
+    "checkpoint_path": null,
+    "checkpoint_paths": null,
+    "class_balance": false,
+    "config_path": "../data/args_files/optimized_configs.json",
+    "constraints_path": null,
+    "crossval_index_dir": null,
+    "crossval_index_file": null,
+    "crossval_index_sets": null,
+    "cuda": true,
+    "data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train.csv",
+    "data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train_weights.csv",
+    "dataset_type": "regression",
+    "depth": 4,
+    "depth_solvent": 3,
+    "device": {
+        "_string": "cuda",
+        "_type": "python_object (type = device)",
+        "_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
+    },
+    "dropout": 0.1,
+    "empty_cache": false,
+    "ensemble_size": 1,
+    "epochs": 50,
+    "evidential_regularization": 0,
+    "explicit_h": false,
+    "extra_metrics": [],
+    "features_generator": null,
+    "features_only": false,
+    "features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_3/train_extra_x.csv"
+    ],
+    "features_scaling": true,
+    "features_size": null,
+    "ffn_hidden_size": 600,
+    "ffn_num_layers": 3,
+    "final_lr": 0.0001,
+    "folds_file": null,
+    "freeze_first_only": false,
+    "frzn_ffn_layers": 0,
+    "gpu": null,
+    "grad_clip": null,
+    "hidden_size": 600,
+    "hidden_size_solvent": 300,
+    "ignore_columns": null,
+    "ignore_nan_metrics": false,
+    "init_lr": 0.0001,
+    "is_atom_bond_targets": false,
+    "keeping_atom_map": false,
+    "log_frequency": 10,
+    "loss_function": "mse",
+    "max_data_size": null,
+    "max_lr": 0.001,
+    "metric": "rmse",
+    "metrics": [
+        "rmse"
+    ],
+    "minimize_score": true,
+    "mpn_shared": false,
+    "multiclass_num_classes": 3,
+    "no_adding_bond_types": false,
+    "no_atom_descriptor_scaling": false,
+    "no_bond_descriptor_scaling": false,
+    "no_cache_mol": false,
+    "no_cuda": false,
+    "no_features_scaling": false,
+    "no_shared_atom_bond_ffn": false,
+    "num_folds": 1,
+    "num_lrs": 1,
+    "num_tasks": 1,
+    "num_workers": 8,
+    "number_of_molecules": 1,
+    "overwrite_default_atom_features": false,
+    "overwrite_default_bond_features": false,
+    "phase_features_path": null,
+    "pytorch_seed": 0,
+    "quantile_loss_alpha": 0.1,
+    "quantiles": [],
+    "quiet": false,
+    "reaction": false,
+    "reaction_mode": "reac_diff",
+    "reaction_solvent": false,
+    "reproducibility": {
+        "command_line": "python main_script.py train all_amine_split_for_paper",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
+        "git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
+        "time": "Tue Jul 30 10:34:31 2024"
+    },
+    "resume_experiment": false,
+    "save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_3",
+    "save_preds": false,
+    "save_smiles_splits": false,
+    "seed": 42,
+    "separate_test_atom_descriptors_path": null,
+    "separate_test_bond_descriptors_path": null,
+    "separate_test_constraints_path": null,
+    "separate_test_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_3/test_extra_x.csv"
+    ],
+    "separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/test.csv",
+    "separate_test_phase_features_path": null,
+    "separate_val_atom_descriptors_path": null,
+    "separate_val_bond_descriptors_path": null,
+    "separate_val_constraints_path": null,
+    "separate_val_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_3/valid_extra_x.csv"
+    ],
+    "separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/valid.csv",
+    "separate_val_phase_features_path": null,
+    "shared_atom_bond_ffn": true,
+    "show_individual_scores": false,
+    "smiles_columns": [
+        "smiles"
+    ],
+    "spectra_activation": "exp",
+    "spectra_phase_mask_path": null,
+    "spectra_target_floor": 1e-08,
+    "split_key_molecule": 0,
+    "split_sizes": [
+        1.0,
+        0.0,
+        0.0
+    ],
+    "split_type": "random",
+    "target_columns": null,
+    "target_weights": null,
+    "task_names": [
+        "quantified_delivery"
+    ],
+    "test": false,
+    "test_fold_index": null,
+    "train_data_size": null,
+    "undirected": false,
+    "use_input_features": true,
+    "val_fold_index": null,
+    "warmup_epochs": 2.0,
+    "weights_ffn_num_layers": 2
+}
--- a/data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/events.out.tfevents.1722350072.andersonxps
+++ b/data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/events.out.tfevents.1722350072.andersonxps
--- a/data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/model.pt
+++ b/data/external/all_amine_split_for_LiON/cv_3/fold_0/model_0/model.pt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5ae4ef9d7980963742eb04c54cdf5fe3a16db9d95c22db273ad072413b651b3
+size 6540631
--- a/data/external/all_amine_split_for_LiON/cv_3/fold_0/test_scores.json
+++ b/data/external/all_amine_split_for_LiON/cv_3/fold_0/test_scores.json
@ -0,0 +1,5 @@
+{
+    "rmse": [
+        0.9245934905333985
+    ]
+}
--- a/data/external/all_amine_split_for_LiON/cv_3/preds.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/preds.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/test.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/test.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/test_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/test_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/test_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/test_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/test_scores.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/test_scores.csv
@ -0,0 +1,2 @@
+Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
+quantified_delivery,0.9245934905333985,0.0,0.9245934905333985
--- a/data/external/all_amine_split_for_LiON/cv_3/test_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/test_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/train.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/train.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/train_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/train_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/train_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/train_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/train_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/train_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/valid.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/valid.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/valid_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/valid_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/valid_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/valid_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_3/valid_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_3/valid_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/args.json
+++ b/data/external/all_amine_split_for_LiON/cv_4/args.json
@ -0,0 +1,165 @@
+{
+    "activation": "ReLU",
+    "adding_bond_types": true,
+    "adding_h": false,
+    "aggregation": "mean",
+    "aggregation_norm": 100,
+    "atom_constraints": [],
+    "atom_descriptor_scaling": true,
+    "atom_descriptors": null,
+    "atom_descriptors_path": null,
+    "atom_descriptors_size": 0,
+    "atom_features_size": 0,
+    "atom_messages": false,
+    "atom_targets": [],
+    "batch_size": 50,
+    "bias": false,
+    "bias_solvent": false,
+    "bond_constraints": [],
+    "bond_descriptor_scaling": true,
+    "bond_descriptors": null,
+    "bond_descriptors_path": null,
+    "bond_descriptors_size": 0,
+    "bond_features_size": 0,
+    "bond_targets": [],
+    "cache_cutoff": 10000,
+    "checkpoint_dir": null,
+    "checkpoint_frzn": null,
+    "checkpoint_path": null,
+    "checkpoint_paths": null,
+    "class_balance": false,
+    "config_path": "../data/args_files/optimized_configs.json",
+    "constraints_path": null,
+    "crossval_index_dir": null,
+    "crossval_index_file": null,
+    "crossval_index_sets": null,
+    "cuda": true,
+    "data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train.csv",
+    "data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train_weights.csv",
+    "dataset_type": "regression",
+    "depth": 4,
+    "depth_solvent": 3,
+    "device": {
+        "_string": "cuda",
+        "_type": "python_object (type = device)",
+        "_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
+    },
+    "dropout": 0.1,
+    "empty_cache": false,
+    "ensemble_size": 1,
+    "epochs": 50,
+    "evidential_regularization": 0,
+    "explicit_h": false,
+    "extra_metrics": [],
+    "features_generator": null,
+    "features_only": false,
+    "features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_4/train_extra_x.csv"
+    ],
+    "features_scaling": true,
+    "features_size": null,
+    "ffn_hidden_size": 600,
+    "ffn_num_layers": 3,
+    "final_lr": 0.0001,
+    "folds_file": null,
+    "freeze_first_only": false,
+    "frzn_ffn_layers": 0,
+    "gpu": null,
+    "grad_clip": null,
+    "hidden_size": 600,
+    "hidden_size_solvent": 300,
+    "ignore_columns": null,
+    "ignore_nan_metrics": false,
+    "init_lr": 0.0001,
+    "is_atom_bond_targets": false,
+    "keeping_atom_map": false,
+    "log_frequency": 10,
+    "loss_function": "mse",
+    "max_data_size": null,
+    "max_lr": 0.001,
+    "metric": "rmse",
+    "metrics": [
+        "rmse"
+    ],
+    "minimize_score": true,
+    "mpn_shared": false,
+    "multiclass_num_classes": 3,
+    "no_adding_bond_types": false,
+    "no_atom_descriptor_scaling": false,
+    "no_bond_descriptor_scaling": false,
+    "no_cache_mol": false,
+    "no_cuda": false,
+    "no_features_scaling": false,
+    "no_shared_atom_bond_ffn": false,
+    "num_folds": 1,
+    "num_lrs": 1,
+    "num_tasks": 1,
+    "num_workers": 8,
+    "number_of_molecules": 1,
+    "overwrite_default_atom_features": false,
+    "overwrite_default_bond_features": false,
+    "phase_features_path": null,
+    "pytorch_seed": 0,
+    "quantile_loss_alpha": 0.1,
+    "quantiles": [],
+    "quiet": false,
+    "reaction": false,
+    "reaction_mode": "reac_diff",
+    "reaction_solvent": false,
+    "reproducibility": {
+        "command_line": "python main_script.py train all_amine_split_for_paper",
+        "git_has_uncommitted_changes": true,
+        "git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
+        "git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
+        "time": "Tue Jul 30 10:40:44 2024"
+    },
+    "resume_experiment": false,
+    "save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_4",
+    "save_preds": false,
+    "save_smiles_splits": false,
+    "seed": 42,
+    "separate_test_atom_descriptors_path": null,
+    "separate_test_bond_descriptors_path": null,
+    "separate_test_constraints_path": null,
+    "separate_test_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_4/test_extra_x.csv"
+    ],
+    "separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/test.csv",
+    "separate_test_phase_features_path": null,
+    "separate_val_atom_descriptors_path": null,
+    "separate_val_bond_descriptors_path": null,
+    "separate_val_constraints_path": null,
+    "separate_val_features_path": [
+        "../data/crossval_splits/all_amine_split_for_paper/cv_4/valid_extra_x.csv"
+    ],
+    "separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/valid.csv",
+    "separate_val_phase_features_path": null,
+    "shared_atom_bond_ffn": true,
+    "show_individual_scores": false,
+    "smiles_columns": [
+        "smiles"
+    ],
+    "spectra_activation": "exp",
+    "spectra_phase_mask_path": null,
+    "spectra_target_floor": 1e-08,
+    "split_key_molecule": 0,
+    "split_sizes": [
+        1.0,
+        0.0,
+        0.0
+    ],
+    "split_type": "random",
+    "target_columns": null,
+    "target_weights": null,
+    "task_names": [
+        "quantified_delivery"
+    ],
+    "test": false,
+    "test_fold_index": null,
+    "train_data_size": null,
+    "undirected": false,
+    "use_input_features": true,
+    "val_fold_index": null,
+    "warmup_epochs": 2.0,
+    "weights_ffn_num_layers": 2
+}
--- a/data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/events.out.tfevents.1722350444.andersonxps
+++ b/data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/events.out.tfevents.1722350444.andersonxps
--- a/data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/model.pt
+++ b/data/external/all_amine_split_for_LiON/cv_4/fold_0/model_0/model.pt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c69ab5f85cbca9dac0f871b1d6841a199cf40eeba2f46173eff9654a8f59bc8d
+size 6540631
--- a/data/external/all_amine_split_for_LiON/cv_4/fold_0/test_scores.json
+++ b/data/external/all_amine_split_for_LiON/cv_4/fold_0/test_scores.json
@ -0,0 +1,5 @@
+{
+    "rmse": [
+        0.8268900471469541
+    ]
+}
--- a/data/external/all_amine_split_for_LiON/cv_4/preds.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/preds.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/test.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/test.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/test_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/test_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/test_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/test_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/test_scores.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/test_scores.csv
@ -0,0 +1,2 @@
+Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
+quantified_delivery,0.8268900471469541,0.0,0.8268900471469541
--- a/data/external/all_amine_split_for_LiON/cv_4/test_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/test_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/train.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/train.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/train_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/train_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/train_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/train_metadata.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/train_weights.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/train_weights.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/valid.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/valid.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/valid_extra_x.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/valid_extra_x.csv
--- a/data/external/all_amine_split_for_LiON/cv_4/valid_metadata.csv
+++ b/data/external/all_amine_split_for_LiON/cv_4/valid_metadata.csv
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
RYDE	5730490642	Merge pull request #1 from RYDE-WORK/amine_split Amine split	2026-02-11 16:55:20 +08:00
RYDE-WORK	0b90caef1d	Resolve conflicts	2026-02-11 16:51:21 +08:00
RYDE-WORK	a9392aa780	Update models and UI	2026-02-11 16:49:28 +08:00
RYDE-WORK	3f33f9d233	Add lfs	2026-02-11 16:45:21 +08:00
RYDE-WORK	c225fc67a7	Dedicated docker-compose.yml for gpu deployment	2026-01-26 11:11:52 +08:00
RYDE-WORK	3cce4c9373	Dockerize	2026-01-26 11:08:57 +08:00
RYDE-WORK	68119df128	Update app.py	2026-01-26 10:33:50 +08:00
RYDE-WORK	75e1dcb0eb	Add UI	2026-01-26 00:09:21 +08:00
RYDE-WORK	982e98cced	Add LNP fomular optimization	2026-01-25 23:54:20 +08:00
RYDE-WORK	39a14e4274	Add final models	2026-01-25 19:19:29 +08:00
RYDE-WORK	93a6f8654d	...	2026-01-23 17:51:08 +08:00
RYDE-WORK	a56637c8ac	Add loss visualization	2026-01-23 13:40:22 +08:00
RYDE-WORK	871afc5988	Add random CV split	2026-01-23 10:12:25 +08:00
RYDE-WORK	ac4246c2b7	Add train_cv(without pretrain)	2026-01-22 18:06:13 +08:00
RYDE-WORK	47bbb64c66	Add more metrics	2026-01-22 17:06:24 +08:00
RYDE-WORK	039be54c5a	...	2026-01-22 01:01:29 +08:00
RYDE-WORK	e6a5e5495a	...	2026-01-22 00:24:13 +08:00
RYDE-WORK	e123fc8f3e	update pretrain ratio	2026-01-21 23:36:53 +08:00
RYDE-WORK	c392b48994	Add CV results	2026-01-21 22:57:44 +08:00
RYDE-WORK	e1c85c83ba	CV results	2026-01-21 20:10:48 +08:00
RYDE-WORK	a2bfb26dfc	Add CV	2026-01-21 19:35:55 +08:00
RYDE-WORK	6773929ea2	增加LiON的评估指标	2026-01-21 16:20:10 +08:00