增加LiON的评估指标

This commit is contained in:
RYDE-WORK 2026-01-21 16:20:10 +08:00
parent 6e4f85c5a8
commit 6773929ea2
92 changed files with 202534 additions and 0 deletions

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:15:25 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_0",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_0/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_0/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8880622451903801
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8880622451903801,0.0,0.8880622451903801
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8880622451903801 0.0 0.8880622451903801

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:21:40 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_1",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_1/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_1/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"rmse": [
1.01673724295223
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,1.01673724295223,0.0,1.01673724295223
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 1.01673724295223 0.0 1.01673724295223

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:28:04 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_2",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_2/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_2/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8788072588544181
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8788072588544181,0.0,0.8788072588544181
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8788072588544181 0.0 0.8788072588544181

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:34:31 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_3",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_3/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_3/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.9245934905333985
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.9245934905333985,0.0,0.9245934905333985
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.9245934905333985 0.0 0.9245934905333985

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
{
"activation": "ReLU",
"adding_bond_types": true,
"adding_h": false,
"aggregation": "mean",
"aggregation_norm": 100,
"atom_constraints": [],
"atom_descriptor_scaling": true,
"atom_descriptors": null,
"atom_descriptors_path": null,
"atom_descriptors_size": 0,
"atom_features_size": 0,
"atom_messages": false,
"atom_targets": [],
"batch_size": 50,
"bias": false,
"bias_solvent": false,
"bond_constraints": [],
"bond_descriptor_scaling": true,
"bond_descriptors": null,
"bond_descriptors_path": null,
"bond_descriptors_size": 0,
"bond_features_size": 0,
"bond_targets": [],
"cache_cutoff": 10000,
"checkpoint_dir": null,
"checkpoint_frzn": null,
"checkpoint_path": null,
"checkpoint_paths": null,
"class_balance": false,
"config_path": "../data/args_files/optimized_configs.json",
"constraints_path": null,
"crossval_index_dir": null,
"crossval_index_file": null,
"crossval_index_sets": null,
"cuda": true,
"data_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train.csv",
"data_weights_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/train_weights.csv",
"dataset_type": "regression",
"depth": 4,
"depth_solvent": 3,
"device": {
"_string": "cuda",
"_type": "python_object (type = device)",
"_value": "gASVHwAAAAAAAACMBXRvcmNolIwGZGV2aWNllJOUjARjdWRhlIWUUpQu"
},
"dropout": 0.1,
"empty_cache": false,
"ensemble_size": 1,
"epochs": 50,
"evidential_regularization": 0,
"explicit_h": false,
"extra_metrics": [],
"features_generator": null,
"features_only": false,
"features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/train_extra_x.csv"
],
"features_scaling": true,
"features_size": null,
"ffn_hidden_size": 600,
"ffn_num_layers": 3,
"final_lr": 0.0001,
"folds_file": null,
"freeze_first_only": false,
"frzn_ffn_layers": 0,
"gpu": null,
"grad_clip": null,
"hidden_size": 600,
"hidden_size_solvent": 300,
"ignore_columns": null,
"ignore_nan_metrics": false,
"init_lr": 0.0001,
"is_atom_bond_targets": false,
"keeping_atom_map": false,
"log_frequency": 10,
"loss_function": "mse",
"max_data_size": null,
"max_lr": 0.001,
"metric": "rmse",
"metrics": [
"rmse"
],
"minimize_score": true,
"mpn_shared": false,
"multiclass_num_classes": 3,
"no_adding_bond_types": false,
"no_atom_descriptor_scaling": false,
"no_bond_descriptor_scaling": false,
"no_cache_mol": false,
"no_cuda": false,
"no_features_scaling": false,
"no_shared_atom_bond_ffn": false,
"num_folds": 1,
"num_lrs": 1,
"num_tasks": 1,
"num_workers": 8,
"number_of_molecules": 1,
"overwrite_default_atom_features": false,
"overwrite_default_bond_features": false,
"phase_features_path": null,
"pytorch_seed": 0,
"quantile_loss_alpha": 0.1,
"quantiles": [],
"quiet": false,
"reaction": false,
"reaction_mode": "reac_diff",
"reaction_solvent": false,
"reproducibility": {
"command_line": "python main_script.py train all_amine_split_for_paper",
"git_has_uncommitted_changes": true,
"git_root": "/media/andersonxps/wd_4tb/evan/LNP_ML",
"git_url": "https://github.com/jswitten/LNP_ML/tree/167822980dc26ba65c5c14539c4ce12b81b0b8f3",
"time": "Tue Jul 30 10:40:44 2024"
},
"resume_experiment": false,
"save_dir": "../data/crossval_splits/all_amine_split_for_paper/cv_4",
"save_preds": false,
"save_smiles_splits": false,
"seed": 42,
"separate_test_atom_descriptors_path": null,
"separate_test_bond_descriptors_path": null,
"separate_test_constraints_path": null,
"separate_test_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/test_extra_x.csv"
],
"separate_test_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/test.csv",
"separate_test_phase_features_path": null,
"separate_val_atom_descriptors_path": null,
"separate_val_bond_descriptors_path": null,
"separate_val_constraints_path": null,
"separate_val_features_path": [
"../data/crossval_splits/all_amine_split_for_paper/cv_4/valid_extra_x.csv"
],
"separate_val_path": "../data/crossval_splits/all_amine_split_for_paper/cv_4/valid.csv",
"separate_val_phase_features_path": null,
"shared_atom_bond_ffn": true,
"show_individual_scores": false,
"smiles_columns": [
"smiles"
],
"spectra_activation": "exp",
"spectra_phase_mask_path": null,
"spectra_target_floor": 1e-08,
"split_key_molecule": 0,
"split_sizes": [
1.0,
0.0,
0.0
],
"split_type": "random",
"target_columns": null,
"target_weights": null,
"task_names": [
"quantified_delivery"
],
"test": false,
"test_fold_index": null,
"train_data_size": null,
"undirected": false,
"use_input_features": true,
"val_fold_index": null,
"warmup_epochs": 2.0,
"weights_ffn_num_layers": 2
}

Binary file not shown.

View File

@ -0,0 +1,5 @@
{
"rmse": [
0.8268900471469541
]
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
Task,Mean rmse,Standard deviation rmse,Fold 0 rmse
quantified_delivery,0.8268900471469541,0.0,0.8268900471469541
1 Task Mean rmse Standard deviation rmse Fold 0 rmse
2 quantified_delivery 0.8268900471469541 0.0 0.8268900471469541

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
{
"data_dir": "/Users/ryde/Documents/workspaces/\u8102\u8d28\u5206\u5b50\u836f\u7269\u9012\u9001\u6027\u80fd\u9884\u6d4b/\u6700\u65b0\u6574\u7406/lnp-ml/data/external/all_amine_split_for_LiON",
"y_col": "quantified_delivery",
"n_splits": 5,
"split_results": {
"cv_0": {
"n_samples": 2037,
"mse": 0.7886545513325788,
"rmse": 0.8880622451903801,
"mae": 0.6877543784155847,
"r2": 0.0932581633950823,
"correlation": 0.392311374929795
},
"cv_1": {
"n_samples": 1658,
"mse": 1.033754621206102,
"rmse": 1.01673724295223,
"mae": 0.7387616255484185,
"r2": 0.1125028601022715,
"correlation": 0.36625872345742133
},
"cv_2": {
"n_samples": 1615,
"mse": 0.7723021982152163,
"rmse": 0.8788072588544181,
"mae": 0.6676348056050336,
"r2": 0.21571146545185693,
"correlation": 0.48232889534779555
},
"cv_3": {
"n_samples": 1754,
"mse": 0.8548731227367337,
"rmse": 0.9245934905333985,
"mae": 0.6985704943044474,
"r2": 0.1788528564997307,
"correlation": 0.43034534464045127
},
"cv_4": {
"n_samples": 1520,
"mse": 0.6837471500706921,
"rmse": 0.8268900471469541,
"mae": 0.6290387454956646,
"r2": 0.27613207572098897,
"correlation": 0.5356790397266105
}
},
"overall": {
"n_samples": 8584,
"mse": 0.8278734457746315,
"rmse": 0.9098755111412943,
"mae": 0.6856342259095253,
"r2": 0.1721265542253685,
"correlation": 0.43182567556105245
},
"summary_stats": {
"rmse": {
"mean": 0.9070180569354761,
"std": 0.0631234750727322,
"min": 0.8268900471469541,
"max": 1.01673724295223
},
"r2": {
"mean": 0.17529148423398608,
"std": 0.06706289614045773,
"min": 0.0932581633950823,
"max": 0.27613207572098897
},
"mae": {
"mean": 0.6843520098738297,
"std": 0.036092020790716446,
"min": 0.6290387454956646,
"max": 0.7387616255484185
},
"correlation": {
"mean": 0.4413846756204148,
"std": 0.061223885190061875,
"min": 0.36625872345742133,
"max": 0.5356790397266105
}
}
}

View File

@ -0,0 +1,204 @@
"""评估外部数据 cross-validation 结果的脚本"""
import json
from pathlib import Path
from typing import Dict
import numpy as np
import pandas as pd
import typer
from loguru import logger
from sklearn.metrics import mean_squared_error, r2_score
from lnp_ml.config import EXTERNAL_DATA_DIR
app = typer.Typer()
def evaluate_split(
test_path: Path,
preds_path: Path,
y_col: str = "quantified_delivery",
) -> Dict[str, float]:
"""
评估单个 split 的预测结果
Args:
test_path: groundtruth CSV 路径包含 smiles y_col
preds_path: 预测结果 CSV 路径包含 smiles y_col
y_col: Y 值列名
Returns:
Dict with rmse, r2, mse, mae, n_samples
"""
# 读取数据
test_df = pd.read_csv(test_path)
preds_df = pd.read_csv(preds_path)
# 检查列是否存在
if y_col not in test_df.columns:
raise ValueError(f"Column '{y_col}' not found in {test_path}")
if y_col not in preds_df.columns:
raise ValueError(f"Column '{y_col}' not found in {preds_path}")
# 直接按行对齐test.csv 和 preds.csv 顺序一致)
assert len(test_df) == len(preds_df), f"行数不匹配: test={len(test_df)}, preds={len(preds_df)}"
# 验证 smiles 列一致(如果存在)
if "smiles" in test_df.columns and "smiles" in preds_df.columns:
if not (test_df["smiles"].values == preds_df["smiles"].values).all():
logger.warning("smiles 列顺序不一致,请检查数据")
y_true = test_df[y_col].values
y_pred = preds_df[y_col].values
# 计算指标
mse = float(mean_squared_error(y_true, y_pred))
rmse = float(np.sqrt(mse))
r2 = float(r2_score(y_true, y_pred))
mae = float(np.mean(np.abs(y_true - y_pred)))
correlation = float(np.corrcoef(y_true, y_pred)[0, 1])
return {
"n_samples": len(y_true),
"mse": mse,
"rmse": rmse,
"mae": mae,
"r2": r2,
"correlation": correlation,
}
@app.command()
def main(
data_dir: Path = EXTERNAL_DATA_DIR / "all_amine_split_for_LiON",
output_path: Path = EXTERNAL_DATA_DIR / "all_amine_split_for_LiON" / "evaluation_results.json",
y_col: str = "quantified_delivery",
):
"""
评估 all_amine_split_for_LiON cross-validation 结果
计算每个 split 和整体的 RMSE 指标
"""
logger.info(f"Evaluating cross-validation results from {data_dir}")
# 找到所有 cv_* 目录
cv_dirs = sorted([d for d in data_dir.iterdir() if d.is_dir() and d.name.startswith("cv_")])
if not cv_dirs:
logger.error(f"No cv_* directories found in {data_dir}")
raise typer.Exit(1)
logger.info(f"Found {len(cv_dirs)} splits: {[d.name for d in cv_dirs]}")
# 评估每个 split
split_results = {}
all_y_true = []
all_y_pred = []
for cv_dir in cv_dirs:
split_name = cv_dir.name
test_path = cv_dir / "test.csv"
preds_path = cv_dir / "preds.csv"
if not test_path.exists():
logger.warning(f" {split_name}: test.csv not found, skipping")
continue
if not preds_path.exists():
logger.warning(f" {split_name}: preds.csv not found, skipping")
continue
# 评估
metrics = evaluate_split(test_path, preds_path, y_col)
split_results[split_name] = metrics
logger.info(
f" {split_name}: n={metrics['n_samples']}, "
f"RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}"
)
# 收集所有数据用于计算整体指标
test_df = pd.read_csv(test_path)
preds_df = pd.read_csv(preds_path)
all_y_true.extend(test_df[y_col].tolist())
all_y_pred.extend(preds_df[y_col].tolist())
# 计算整体指标
all_y_true = np.array(all_y_true)
all_y_pred = np.array(all_y_pred)
overall_mse = float(mean_squared_error(all_y_true, all_y_pred))
overall_rmse = float(np.sqrt(overall_mse))
overall_r2 = float(r2_score(all_y_true, all_y_pred))
overall_mae = float(np.mean(np.abs(all_y_true - all_y_pred)))
overall_correlation = float(np.corrcoef(all_y_true, all_y_pred)[0, 1])
overall_results = {
"n_samples": len(all_y_true),
"mse": overall_mse,
"rmse": overall_rmse,
"mae": overall_mae,
"r2": overall_r2,
"correlation": overall_correlation,
}
# 计算 split 指标的均值和标准差
split_metrics = list(split_results.values())
summary_stats = {}
for metric in ["rmse", "r2", "mae", "correlation"]:
values = [s[metric] for s in split_metrics]
summary_stats[metric] = {
"mean": float(np.mean(values)),
"std": float(np.std(values)),
"min": float(np.min(values)),
"max": float(np.max(values)),
}
# 汇总结果
results = {
"data_dir": str(data_dir),
"y_col": y_col,
"n_splits": len(split_results),
"split_results": split_results,
"overall": overall_results,
"summary_stats": summary_stats,
}
# 打印结果
logger.info("\n" + "=" * 60)
logger.info("CROSS-VALIDATION EVALUATION RESULTS")
logger.info("=" * 60)
logger.info(f"\n[Per-Split Results]")
for split_name, metrics in sorted(split_results.items()):
logger.info(
f" {split_name}: RMSE={metrics['rmse']:.4f}, "
f"R²={metrics['r2']:.4f}, "
f"MAE={metrics['mae']:.4f}, "
f"Corr={metrics['correlation']:.4f}"
)
logger.info(f"\n[Summary Statistics (across {len(split_results)} splits)]")
for metric, stats in summary_stats.items():
logger.info(
f" {metric.upper():12s}: "
f"mean={stats['mean']:.4f} ± {stats['std']:.4f} "
f"(min={stats['min']:.4f}, max={stats['max']:.4f})"
)
logger.info(f"\n[Overall (all {overall_results['n_samples']} samples pooled)]")
logger.info(f" RMSE: {overall_results['rmse']:.4f}")
logger.info(f" R²: {overall_results['r2']:.4f}")
logger.info(f" MAE: {overall_results['mae']:.4f}")
logger.info(f" Correlation: {overall_results['correlation']:.4f}")
# 保存结果
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
logger.success(f"\nSaved results to {output_path}")
if __name__ == "__main__":
app()