diff --git a/lnp_ml/modeling/benchmark.py b/lnp_ml/modeling/benchmark.py index 2c6be78..3ad7ac9 100644 --- a/lnp_ml/modeling/benchmark.py +++ b/lnp_ml/modeling/benchmark.py @@ -416,7 +416,9 @@ def main( model.rdkit_encoder._cache = rdkit_cache logger.info(f"Reusing RDKit cache with {len(rdkit_cache)} entries") - logger.info(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}") + n_params_total = sum(p.numel() for p in model.parameters()) + n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) + logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable") # 训练 result = train_fold( diff --git a/lnp_ml/modeling/pretrain.py b/lnp_ml/modeling/pretrain.py index abb702d..b7d9d66 100644 --- a/lnp_ml/modeling/pretrain.py +++ b/lnp_ml/modeling/pretrain.py @@ -303,8 +303,9 @@ def main( dropout=dropout, ) - n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) - logger.info(f"Model parameters: {n_params:,}") + n_params_total = sum(p.numel() for p in model.parameters()) + n_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) + logger.info(f"Model parameters: {n_params_total:,} total, {n_params_trainable:,} trainable") # 预热 RDKit 缓存(避免训练时阻塞) all_smiles = train_df["smiles"].tolist() + val_df["smiles"].tolist()