diff --git a/lnp_ml/modeling/final_train_optuna_cv.py b/lnp_ml/modeling/final_train_optuna_cv.py
index b1782bb..6b5de89 100644
--- a/lnp_ml/modeling/final_train_optuna_cv.py
+++ b/lnp_ml/modeling/final_train_optuna_cv.py
@@ -260,18 +260,30 @@ def run_optuna_cv(
     n_samples = len(full_dataset)
     indices = np.arange(n_samples)
     
+    # 固定架构参数（与预训练一致，确保权重完整加载）
+    _cfg = pretrain_config or {}
+    fixed_d_model = _cfg.get("d_model", 256)
+    fixed_num_heads = _cfg.get("num_heads", 8)
+    fixed_n_attn_layers = _cfg.get("n_attn_layers", 4)
+    fixed_fusion_strategy = _cfg.get("fusion_strategy", "attention")
+    fixed_head_hidden_dim = _cfg.get("head_hidden_dim", 128)
+    logger.info(
+        f"Fixed architecture params: d_model={fixed_d_model}, num_heads={fixed_num_heads}, "
+        f"n_attn_layers={fixed_n_attn_layers}, fusion={fixed_fusion_strategy}, "
+        f"head_hidden_dim={fixed_head_hidden_dim}"
+    )
+    
     def objective(trial: optuna.Trial) -> float:
-        # 采样超参数
-        d_model = trial.suggest_categorical("d_model", [128, 256, 512])
-        num_heads = trial.suggest_categorical("num_heads", [4, 8])
-        n_attn_layers = trial.suggest_int("n_attn_layers", 2, 6)
-        fusion_strategy = trial.suggest_categorical(
-            "fusion_strategy", ["attention", "avg", "max"]
-        )
-        head_hidden_dim = trial.suggest_categorical("head_hidden_dim", [64, 128, 256])
-        dropout = trial.suggest_float("dropout", 0.05, 0.3)
-        lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
-        weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
+        d_model = fixed_d_model
+        num_heads = fixed_num_heads
+        n_attn_layers = fixed_n_attn_layers
+        fusion_strategy = fixed_fusion_strategy
+        head_hidden_dim = fixed_head_hidden_dim
+        
+        # 搜索训练超参数
+        dropout = trial.suggest_float("dropout", 0.1, 0.5)
+        lr = trial.suggest_float("lr", 1e-5, 3e-4, log=True)
+        weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
         
         # 3-fold CV
         cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)
@@ -351,7 +363,14 @@ def run_optuna_cv(
     
     study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
     
-    best_params = study.best_trial.params
+    best_params = dict(study.best_trial.params)
+    best_params.update({
+        "d_model": fixed_d_model,
+        "num_heads": fixed_num_heads,
+        "n_attn_layers": fixed_n_attn_layers,
+        "fusion_strategy": fixed_fusion_strategy,
+        "head_hidden_dim": fixed_head_hidden_dim,
+    })
     epoch_mean = study.best_trial.user_attrs.get("epoch_mean", epochs_per_trial)
     
     logger.info(f"Best trial: {study.best_trial.number}")
diff --git a/lnp_ml/modeling/nested_cv_optuna.py b/lnp_ml/modeling/nested_cv_optuna.py
index f37265c..fa0f38b 100644
--- a/lnp_ml/modeling/nested_cv_optuna.py
+++ b/lnp_ml/modeling/nested_cv_optuna.py
@@ -373,18 +373,30 @@ def run_inner_optuna(
     
     inner_strata = strata[inner_train_indices]
     
+    # 固定架构参数（与预训练一致，确保权重完整加载）
+    _cfg = pretrain_config or {}
+    fixed_d_model = _cfg.get("d_model", 256)
+    fixed_num_heads = _cfg.get("num_heads", 8)
+    fixed_n_attn_layers = _cfg.get("n_attn_layers", 4)
+    fixed_fusion_strategy = _cfg.get("fusion_strategy", "attention")
+    fixed_head_hidden_dim = _cfg.get("head_hidden_dim", 128)
+    logger.info(
+        f"Fixed architecture params: d_model={fixed_d_model}, num_heads={fixed_num_heads}, "
+        f"n_attn_layers={fixed_n_attn_layers}, fusion={fixed_fusion_strategy}, "
+        f"head_hidden_dim={fixed_head_hidden_dim}"
+    )
+    
     def objective(trial: optuna.Trial) -> float:
-        # 采样超参数
-        d_model = trial.suggest_categorical("d_model", [128, 256, 512])
-        num_heads = trial.suggest_categorical("num_heads", [4, 8])
-        n_attn_layers = trial.suggest_int("n_attn_layers", 2, 6)
-        fusion_strategy = trial.suggest_categorical(
-            "fusion_strategy", ["attention", "avg", "max"]
-        )
-        head_hidden_dim = trial.suggest_categorical("head_hidden_dim", [64, 128, 256])
-        dropout = trial.suggest_float("dropout", 0.05, 0.3)
-        lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
-        weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
+        d_model = fixed_d_model
+        num_heads = fixed_num_heads
+        n_attn_layers = fixed_n_attn_layers
+        fusion_strategy = fixed_fusion_strategy
+        head_hidden_dim = fixed_head_hidden_dim
+        
+        # 搜索训练超参数
+        dropout = trial.suggest_float("dropout", 0.1, 0.5)
+        lr = trial.suggest_float("lr", 1e-5, 3e-4, log=True)
+        weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)
         
         # 内层 3-fold CV
         inner_cv = StratifiedKFold(
@@ -471,7 +483,14 @@ def run_inner_optuna(
     
     study.optimize(objective, n_trials=n_trials, show_progress_bar=True)
     
-    best_params = study.best_trial.params
+    best_params = dict(study.best_trial.params)
+    best_params.update({
+        "d_model": fixed_d_model,
+        "num_heads": fixed_num_heads,
+        "n_attn_layers": fixed_n_attn_layers,
+        "fusion_strategy": fixed_fusion_strategy,
+        "head_hidden_dim": fixed_head_hidden_dim,
+    })
     epoch_mean = study.best_trial.user_attrs.get("epoch_mean", epochs_per_trial)
     
     logger.info(f"Best trial: {study.best_trial.number}")