mirror of
https://github.com/RYDE-WORK/lnp_ml.git
synced 2026-03-21 09:36:32 +08:00
Merge branch 'main' of github.com:RYDE-WORK/lnp_ml into main
This commit is contained in:
commit
002aaebd6f
@ -1,6 +1,7 @@
|
|||||||
"""Benchmark 脚本:在 baseline 论文公开的 CV 划分上评估模型(仅 delivery 任务)"""
|
"""Benchmark 脚本:在 baseline 论文公开的 CV 划分上评估模型(仅 delivery 任务)"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import math
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
|
|
||||||
@ -9,6 +10,7 @@ import pandas as pd
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
from torch.optim.lr_scheduler import LambdaLR, CosineAnnealingLR, SequentialLR
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from sklearn.metrics import mean_squared_error, r2_score
|
from sklearn.metrics import mean_squared_error, r2_score
|
||||||
@ -158,6 +160,7 @@ def train_fold(
|
|||||||
weight_decay: float = 1e-5,
|
weight_decay: float = 1e-5,
|
||||||
epochs: int = 50,
|
epochs: int = 50,
|
||||||
patience: int = 10,
|
patience: int = 10,
|
||||||
|
warmup_epochs: int = 3,
|
||||||
config: Optional[Dict] = None,
|
config: Optional[Dict] = None,
|
||||||
) -> Dict:
|
) -> Dict:
|
||||||
"""训练单个 fold"""
|
"""训练单个 fold"""
|
||||||
@ -166,9 +169,19 @@ def train_fold(
|
|||||||
logger.info(f"{'='*60}")
|
logger.info(f"{'='*60}")
|
||||||
|
|
||||||
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
|
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
|
||||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
|
||||||
optimizer, mode="min", factor=0.5, patience=5
|
warmup_scheduler = LambdaLR(
|
||||||
|
optimizer, lr_lambda=lambda epoch: (epoch + 1) / warmup_epochs
|
||||||
)
|
)
|
||||||
|
cosine_scheduler = CosineAnnealingLR(
|
||||||
|
optimizer, T_max=epochs - warmup_epochs
|
||||||
|
)
|
||||||
|
scheduler = SequentialLR(
|
||||||
|
optimizer,
|
||||||
|
schedulers=[warmup_scheduler, cosine_scheduler],
|
||||||
|
milestones=[warmup_epochs],
|
||||||
|
)
|
||||||
|
|
||||||
early_stopping = EarlyStopping(patience=patience)
|
early_stopping = EarlyStopping(patience=patience)
|
||||||
|
|
||||||
best_val_loss = float("inf")
|
best_val_loss = float("inf")
|
||||||
@ -198,7 +211,7 @@ def train_fold(
|
|||||||
"lr": current_lr,
|
"lr": current_lr,
|
||||||
})
|
})
|
||||||
|
|
||||||
scheduler.step(val_metrics["loss"])
|
scheduler.step()
|
||||||
|
|
||||||
if val_metrics["loss"] < best_val_loss:
|
if val_metrics["loss"] < best_val_loss:
|
||||||
best_val_loss = val_metrics["loss"]
|
best_val_loss = val_metrics["loss"]
|
||||||
@ -284,11 +297,11 @@ def main(
|
|||||||
data_dir: Path = PROCESSED_DATA_DIR / "benchmark",
|
data_dir: Path = PROCESSED_DATA_DIR / "benchmark",
|
||||||
output_dir: Path = MODELS_DIR / "benchmark",
|
output_dir: Path = MODELS_DIR / "benchmark",
|
||||||
# 模型参数
|
# 模型参数
|
||||||
d_model: int = 128,
|
d_model: int = 256,
|
||||||
num_heads: int = 4,
|
num_heads: int = 8,
|
||||||
n_attn_layers: int = 2,
|
n_attn_layers: int = 4,
|
||||||
fusion_strategy: str = "attention",
|
fusion_strategy: str = "attention",
|
||||||
head_hidden_dim: int = 64,
|
head_hidden_dim: int = 128,
|
||||||
dropout: float = 0.1,
|
dropout: float = 0.1,
|
||||||
# MPNN 参数
|
# MPNN 参数
|
||||||
use_mpnn: bool = False,
|
use_mpnn: bool = False,
|
||||||
@ -416,7 +429,9 @@ def main(
|
|||||||
model.rdkit_encoder._cache = rdkit_cache
|
model.rdkit_encoder._cache = rdkit_cache
|
||||||
logger.info(f"Reusing RDKit cache with {len(rdkit_cache)} entries")
|
logger.info(f"Reusing RDKit cache with {len(rdkit_cache)} entries")
|
||||||
|
|
||||||
logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
|
n_params_total = sum(p.numel() for p in model.parameters())
|
||||||
|
n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||||
|
logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable")
|
||||||
|
|
||||||
# 训练
|
# 训练
|
||||||
result = train_fold(
|
result = train_fold(
|
||||||
|
|||||||
@ -303,8 +303,9 @@ def main(
|
|||||||
dropout=dropout,
|
dropout=dropout,
|
||||||
)
|
)
|
||||||
|
|
||||||
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
n_params_total = sum(p.numel() for p in model.parameters())
|
||||||
logger.info(f"Model parameters: {n_params:,}")
|
n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||||
|
logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable")
|
||||||
|
|
||||||
# 预热 RDKit 缓存(避免训练时阻塞)
|
# 预热 RDKit 缓存(避免训练时阻塞)
|
||||||
all_smiles = train_df["smiles"].tolist() + val_df["smiles"].tolist()
|
all_smiles = train_df["smiles"].tolist() + val_df["smiles"].tolist()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user