Merge branch 'main' of github.com:RYDE-WORK/lnp_ml into main

This commit is contained in:
RYDE-WORK 2026-02-28 17:53:28 +08:00
commit 002aaebd6f
2 changed files with 26 additions and 10 deletions

View File

@ -1,6 +1,7 @@
"""Benchmark 脚本:在 baseline 论文公开的 CV 划分上评估模型(仅 delivery 任务)""" """Benchmark 脚本:在 baseline 论文公开的 CV 划分上评估模型(仅 delivery 任务)"""
import json import json
import math
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional from typing import Dict, List, Optional
@ -9,6 +10,7 @@ import pandas as pd
import torch import torch
import torch.nn as nn import torch.nn as nn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import LambdaLR, CosineAnnealingLR, SequentialLR
from loguru import logger from loguru import logger
from tqdm import tqdm from tqdm import tqdm
from sklearn.metrics import mean_squared_error, r2_score from sklearn.metrics import mean_squared_error, r2_score
@ -158,6 +160,7 @@ def train_fold(
weight_decay: float = 1e-5, weight_decay: float = 1e-5,
epochs: int = 50, epochs: int = 50,
patience: int = 10, patience: int = 10,
warmup_epochs: int = 3,
config: Optional[Dict] = None, config: Optional[Dict] = None,
) -> Dict: ) -> Dict:
"""训练单个 fold""" """训练单个 fold"""
@ -166,9 +169,19 @@ def train_fold(
logger.info(f"{'='*60}") logger.info(f"{'='*60}")
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay) optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optimizer, mode="min", factor=0.5, patience=5 warmup_scheduler = LambdaLR(
optimizer, lr_lambda=lambda epoch: (epoch + 1) / warmup_epochs
) )
cosine_scheduler = CosineAnnealingLR(
optimizer, T_max=epochs - warmup_epochs
)
scheduler = SequentialLR(
optimizer,
schedulers=[warmup_scheduler, cosine_scheduler],
milestones=[warmup_epochs],
)
early_stopping = EarlyStopping(patience=patience) early_stopping = EarlyStopping(patience=patience)
best_val_loss = float("inf") best_val_loss = float("inf")
@ -198,7 +211,7 @@ def train_fold(
"lr": current_lr, "lr": current_lr,
}) })
scheduler.step(val_metrics["loss"]) scheduler.step()
if val_metrics["loss"] < best_val_loss: if val_metrics["loss"] < best_val_loss:
best_val_loss = val_metrics["loss"] best_val_loss = val_metrics["loss"]
@ -284,11 +297,11 @@ def main(
data_dir: Path = PROCESSED_DATA_DIR / "benchmark", data_dir: Path = PROCESSED_DATA_DIR / "benchmark",
output_dir: Path = MODELS_DIR / "benchmark", output_dir: Path = MODELS_DIR / "benchmark",
# 模型参数 # 模型参数
d_model: int = 128, d_model: int = 256,
num_heads: int = 4, num_heads: int = 8,
n_attn_layers: int = 2, n_attn_layers: int = 4,
fusion_strategy: str = "attention", fusion_strategy: str = "attention",
head_hidden_dim: int = 64, head_hidden_dim: int = 128,
dropout: float = 0.1, dropout: float = 0.1,
# MPNN 参数 # MPNN 参数
use_mpnn: bool = False, use_mpnn: bool = False,
@ -416,7 +429,9 @@ def main(
model.rdkit_encoder._cache = rdkit_cache model.rdkit_encoder._cache = rdkit_cache
logger.info(f"Reusing RDKit cache with {len(rdkit_cache)} entries") logger.info(f"Reusing RDKit cache with {len(rdkit_cache)} entries")
logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}") n_params_total = sum(p.numel() for p in model.parameters())
n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable")
# 训练 # 训练
result = train_fold( result = train_fold(

View File

@ -303,8 +303,9 @@ def main(
dropout=dropout, dropout=dropout,
) )
n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) n_params_total = sum(p.numel() for p in model.parameters())
logger.info(f"Model parameters: {n_params:,}") n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable")
# 预热 RDKit 缓存(避免训练时阻塞) # 预热 RDKit 缓存(避免训练时阻塞)
all_smiles = train_df["smiles"].tolist() + val_df["smiles"].tolist() all_smiles = train_df["smiles"].tolist() + val_df["smiles"].tolist()