{
  "d_model": 256,
  "num_heads": 8,
  "n_attn_layers": 4,
  "fusion_strategy": "attention",
  "head_hidden_dim": 128,
  "dropout": 0.1,
  "use_mpnn": false,
  "mpnn_ensemble_paths": null,
  "lr": 0.0001,
  "weight_decay": 1e-05,
  "batch_size": 64,
  "epochs": 50,
  "patience": 10
}