{ "d_model": 256, "num_heads": 8, "n_attn_layers": 4, "fusion_strategy": "attention", "head_hidden_dim": 128, "dropout": 0.1, "use_mpnn": false, "mpnn_ensemble_paths": null, "lr": 0.0001, "weight_decay": 1e-05, "batch_size": 64, "epochs": 50, "patience": 10 }