diff --git a/lnp_ml/interpretability/token_importance.py b/lnp_ml/interpretability/token_importance.py index aa49ebd..0c851b4 100644 --- a/lnp_ml/interpretability/token_importance.py +++ b/lnp_ml/interpretability/token_importance.py @@ -211,7 +211,13 @@ class _ReplacingDescWrapper(nn.Module): self.organ_index = organ_index def forward(self, raw_desc: torch.Tensor) -> torch.Tensor: - base = self.base_projected[:raw_desc.size(0)].to(raw_desc.device) + B_base = self.base_projected.size(0) + B_input = raw_desc.size(0) + if B_input > B_base: + repeats = B_input // B_base + base = self.base_projected.repeat(repeats, 1, 1).to(raw_desc.device) + else: + base = self.base_projected[:B_input].to(raw_desc.device) out = self.model.forward_replacing_token(raw_desc, "desc", base, task="biodist") return out[:, self.organ_index] diff --git a/reports/feature_importance/desc/desc_importance_biodist_heart.csv b/reports/feature_importance/desc/desc_importance_biodist_heart.csv new file mode 100644 index 0000000..3fa9db2 --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_heart.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.002101736350044354,0.17008735551384338 +1,MaxEStateIndex,0.0011577271073203695,0.09369145758299209 +2,BCUT2D_LOGPHI,0.0010123775021437054,0.08192874054716677 +3,BCUT2D_CHGLO,0.0009962246580054286,0.08062153826965522 +4,BCUT2D_CHGHI,0.0008375578485922876,0.06778109897272314 +5,BCUT2D_LOGPLOW,0.00046765324545304575,0.037845804881709466 +6,BCUT2D_MRHI,0.00030401865030452814,0.0246033372625895 +7,SPS,0.00025942281062206463,0.020994326818273085 +8,MaxAbsEStateIndex,0.00022488863073315374,0.018199576976305396 +9,FractionCSP3,0.00016728536087201042,0.013537913376384608 +10,MinAbsPartialCharge,0.00014424722248358352,0.01167350443928515 +11,LabuteASA,0.00013647780687358122,0.011044748432393335 +12,Kappa1,0.0001265113212215328,0.010238189994039349 +13,SlogP_VSA2,0.00012410435403994923,0.010043401202992595 +14,MaxAbsPartialCharge,0.00012245749171712098,0.00991012546772764 +15,Chi0v,0.00011064123611391077,0.00895387057515596 +16,VSA_EState8,0.00010297626802444793,0.008333567199612583 +17,Kappa2,0.0001003897642455207,0.008124249038571589 +18,HeavyAtomCount,9.51378832830388e-05,0.007699229723297723 +19,MolMR,9.299966859511213e-05,0.007526190282940406 +20,MolWt,9.024135604502668e-05,0.007302968142201973 +21,BertzCT,8.788770408486368e-05,0.007112493995577612 +22,VSA_EState7,8.227359525555267e-05,0.006658160641955842 +23,Chi1,8.156261718253116e-05,0.006600623272786772 +24,NumHAcceptors,7.992458845605065e-05,0.006468062414553103 +25,SMR_VSA10,7.738483154926764e-05,0.006262527340701606 +26,Chi2n,7.696633601475861e-05,0.006228659725119143 +27,MaxPartialCharge,7.666155476941655e-05,0.00620399467327791 +28,Chi0,7.61215625954581e-05,0.006160294691182237 +29,FpDensityMorgan1,7.54852510160209e-05,0.006108799862239003 +30,NOCount,7.492074803903652e-05,0.006063116292778476 +31,SMR_VSA6,7.250644794369265e-05,0.005867734070538664 +32,Chi3v,7.022790759071145e-05,0.005683338485877451 +33,NumValenceElectrons,6.951203921372518e-05,0.005625405358758528 +34,Ipc,6.872030187055142e-05,0.005561332378834504 +35,MinPartialCharge,6.785631521270847e-05,0.0054914124738813215 +36,Kappa3,6.735493511579459e-05,0.0054508372538783195 +37,ExactMolWt,6.553744291985794e-05,0.005303752943675763 +38,SMR_VSA5,6.548856643802724e-05,0.005299797513423425 +39,Chi1v,6.420830866395614e-05,0.005196190008531958 +40,Chi0n,5.8956787308253886e-05,0.004771199795179546 +41,fr_unbrch_alkane,5.8756837510737503e-05,0.0047550184447954355 +42,FpDensityMorgan2,5.7791576662306105e-05,0.004676902716774991 +43,Chi4n,5.489268715219628e-05,0.004442304095168149 +44,qed,5.4508102487885366e-05,0.004411180786803404 +45,Chi4v,5.1730164702686894e-05,0.0041863704333751925 +46,BalabanJ,5.129378719711625e-05,0.004151055682347228 +47,MolLogP,5.0809601257078055e-05,0.004111871935005968 +48,Chi3n,4.989438861065491e-05,0.0040378064611135585 +49,EState_VSA3,4.8742003608634505e-05,0.00394454732443672 +50,PEOE_VSA7,4.555317693928754e-05,0.003686484939318912 +51,PEOE_VSA14,4.549313779755655e-05,0.0036816261477563405 +52,FpDensityMorgan3,4.48803540213518e-05,0.00363203535488917 +53,PEOE_VSA6,4.466928815046464e-05,0.003614954413305957 +54,SMR_VSA1,4.427342729293711e-05,0.003582918555712839 +55,HeavyAtomMolWt,4.2989875103563254e-05,0.0034790444434579905 +56,fr_ether,4.172796202867719e-05,0.003376921521241232 +57,fr_amide,4.1214185681973556e-05,0.0033353431091180654 +58,SlogP_VSA5,4.053695096988885e-05,0.0032805364911337323 +59,PEOE_VSA1,4.0412788611784604e-05,0.0032704883958320223 +60,PEOE_VSA9,3.996782441732017e-05,0.003234478749268501 +61,Chi2v,3.996698835137207e-05,0.003234411088904657 +62,NumRotatableBonds,3.9930819424518675e-05,0.0032314840437878242 +63,EState_VSA5,3.9515726113709595e-05,0.0031978917601859762 +64,SlogP_VSA3,3.8589005466544067e-05,0.003122894977562344 +65,BCUT2D_MWHI,3.821389938580966e-05,0.003092538743152671 +66,NumHeteroatoms,3.7863845156754436e-05,0.003064209881587716 +67,SMR_VSA3,3.7406390677693566e-05,0.003027189432943059 +68,EState_VSA10,3.7249227295598543e-05,0.0030144706616073535 +69,PEOE_VSA2,3.6816504229379436e-05,0.0029794516536325918 +70,Chi1n,3.67919213199529e-05,0.002977462230908325 +71,TPSA,3.6587265787364306e-05,0.00296090005919324 +72,AvgIpc,3.565292071741898e-05,0.0028852862544070006 +73,fr_NH0,3.52841304442454e-05,0.002855441139770239 +74,fr_C_O,3.468135427637909e-05,0.002806660233279762 +75,EState_VSA2,3.247208857563429e-05,0.0026278708429458173 +76,EState_VSA4,3.236330765959145e-05,0.002619067522615125 +77,fr_ester,3.113756252717879e-05,0.0025198715658522157 +78,fr_NH1,3.083325045400931e-05,0.0024952444827381804 +79,MinAbsEStateIndex,2.642317808610347e-05,0.0021383502668361957 +80,fr_Al_OH,2.629938174091148e-05,0.0021283317918854533 +81,EState_VSA9,2.6119833965623893e-05,0.002113801517293012 +82,fr_C_O_noCOO,2.5898446536013445e-05,0.0020958852822497093 +83,VSA_EState2,2.5086808116752498e-05,0.00203020176663529 +84,PEOE_VSA8,2.3328695132234476e-05,0.0018879228417716123 +85,fr_Al_OH_noTert,2.189198520018945e-05,0.0017716541228255584 +86,NHOHCount,2.161818310276558e-05,0.0017494961225207216 +87,VSA_EState1,2.1408878325334554e-05,0.001732557701063194 +88,PEOE_VSA12,1.8835580016979997e-05,0.001524308220005764 +89,SlogP_VSA1,1.6408452968107534e-05,0.0013278879500560514 +90,EState_VSA8,1.629318028391306e-05,0.001318559269978169 +91,VSA_EState5,1.5311372257862007e-05,0.00123910442742869 +92,BCUT2D_MRLOW,1.5148282212188656e-05,0.0012259060286006797 +93,PEOE_VSA10,1.4544509155503964e-05,0.0011770444468233831 +94,MinEStateIndex,1.4444357413661402e-05,0.0011689394602394344 +95,EState_VSA1,1.4261273924425204e-05,0.0011541230506922641 +96,VSA_EState4,1.3892594289036088e-05,0.001124286889646746 +97,VSA_EState3,1.2692710343035547e-05,0.0010271838028135948 +98,HallKierAlpha,1.2604726731161636e-05,0.0010200635472820796 +99,NumHDonors,1.2441152660811765e-05,0.0010068259777573114 +100,RingCount,1.1516112455901875e-05,0.0009319651883943701 +101,fr_NH2,8.124538253381116e-06,0.0006574950403553194 +102,EState_VSA7,5.4185031912940645e-06,0.00043850356331852906 +103,EState_VSA6,5.305595321847899e-06,0.00042936626075895955 +104,VSA_EState6,5.011425948932639e-06,0.00040555999661395087 +105,PEOE_VSA13,4.4621084668053666e-06,0.00036110534469218204 +106,fr_oxazole,4.25888075162552e-06,0.0003446587220502264 +107,NumAromaticHeterocycles,4.2273110264415804e-06,0.00034210387682869246 +108,fr_SH,3.7866331281078906e-06,0.00030644110763340595 +109,VSA_EState9,3.4081186896022213e-06,0.00027580904483072476 +110,SlogP_VSA10,3.261434198516121e-06,0.00026393829939531265 +111,SMR_VSA4,3.1185691150944425e-06,0.00025237664741458484 +112,NumAliphaticHeterocycles,3.0203063322079028e-06,0.00024442452873602185 +113,NumSaturatedRings,2.9646254304032454e-06,0.00023991843674195216 +114,fr_alkyl_halide,2.904050106202574e-06,0.0002350162535054732 +115,VSA_EState10,2.871416932220895e-06,0.00023237534649330212 +116,NumAliphaticRings,2.8272683116323134e-06,0.00022880252817794236 +117,NumAromaticRings,2.787533896807074e-06,0.00022558693858204827 +118,SlogP_VSA6,2.7487174850092414e-06,0.0002224456402773916 +119,SMR_VSA7,2.671604583997622e-06,0.00021620511947715405 +120,NumAromaticCarbocycles,2.256933458524177e-06,0.0001826470020956666 +121,NumSaturatedHeterocycles,2.2313537428135074e-06,0.00018057691076383428 +122,PEOE_VSA4,2.1054630486215606e-06,0.00017038894629414317 +123,fr_allylic_oxid,2.0904821773222505e-06,0.00016917658833947433 +124,PEOE_VSA5,2.0326389027738164e-06,0.0001644955018644763 +125,SlogP_VSA4,1.9587171211658475e-06,0.000158513228993616 +126,fr_Ar_N,1.9351828427680445e-06,0.00015660866890142156 +127,PEOE_VSA11,1.9313530037556035e-06,0.0001562987312683516 +128,fr_Ndealkylation2,1.7270243747496922e-06,0.00013976301490095316 +129,SlogP_VSA12,1.6786565213306785e-06,0.00013584874645346327 +130,fr_phenol_noOrthoHbond,1.510683563674314e-06,0.00012225518788699181 +131,fr_imidazole,1.4305397298901706e-06,0.00011576938259138549 +132,fr_Ar_OH,1.4057202698759154e-06,0.00011376081652218412 +133,fr_halogen,1.387080916585443e-06,0.00011225238835535063 +134,fr_aniline,1.3650787712172542e-06,0.0001104718192933808 +135,fr_Ar_NH,1.2983241139087514e-06,0.0001050695607610028 +136,fr_morpholine,1.2496330875968022e-06,0.0001011291388796 +137,fr_methoxy,1.206589292710292e-06,9.76457308663293e-05 +138,fr_benzene,1.1679696665266548e-06,9.452035784398546e-05 +139,SMR_VSA9,1.026805082745507e-06,8.309632230925195e-05 +140,fr_thiazole,9.978088562766216e-07,8.074974278710154e-05 +141,fr_Ndealkylation1,9.136898410325335e-07,7.394223772063891e-05 +142,fr_Nhpyrrole,8.837894499617399e-07,7.152248681041951e-05 +143,fr_para_hydroxylation,8.572163228342317e-07,6.937200160767005e-05 +144,PEOE_VSA3,8.571227648601454e-07,6.9364430235362e-05 +145,fr_phenol,7.463953768194904e-07,6.04035876375772e-05 +146,fr_priamide,7.448193993514656e-07,6.0276048405608315e-05 +147,SlogP_VSA11,6.446680845397089e-07,5.217109638000975e-05 +148,fr_piperzine,5.661222110185417e-07,4.581460931946184e-05 +149,fr_bicyclic,5.437687502437398e-07,4.400560933252806e-05 +150,SlogP_VSA8,2.071733696433218e-07,1.6765932879631135e-05 +151,fr_quatN,6.953587692872904e-08,5.627334475084852e-06 +152,fr_nitro,0.0,0.0 +153,fr_lactone,0.0,0.0 +154,fr_isothiocyan,0.0,0.0 +155,fr_lactam,0.0,0.0 +156,fr_sulfone,0.0,0.0 +157,fr_tetrazole,0.0,0.0 +158,fr_term_acetylene,0.0,0.0 +159,fr_ketone_Topliss,0.0,0.0 +160,fr_nitrile,0.0,0.0 +161,fr_thiophene,0.0,0.0 +162,fr_nitroso,0.0,0.0 +163,fr_nitro_arom,0.0,0.0 +164,fr_nitro_arom_nonortho,0.0,0.0 +165,fr_sulfonamd,0.0,0.0 +166,fr_phos_ester,0.0,0.0 +167,fr_thiocyan,0.0,0.0 +168,fr_oxime,0.0,0.0 +169,fr_sulfide,0.0,0.0 +170,fr_pyridine,0.0,0.0 +171,fr_prisulfonamd,0.0,0.0 +172,fr_phos_acid,0.0,0.0 +173,fr_piperdine,0.0,0.0 +174,fr_ketone,0.0,0.0 +175,NumRadicalElectrons,0.0,0.0 +176,fr_isocyan,0.0,0.0 +177,fr_Al_COO,0.0,0.0 +178,fr_HOCCN,0.0,0.0 +179,fr_C_S,0.0,0.0 +180,fr_COO2,0.0,0.0 +181,fr_COO,0.0,0.0 +182,fr_Ar_COO,0.0,0.0 +183,fr_ArN,0.0,0.0 +184,NumSaturatedCarbocycles,0.0,0.0 +185,fr_imide,0.0,0.0 +186,NumAliphaticCarbocycles,0.0,0.0 +187,EState_VSA11,0.0,0.0 +188,SlogP_VSA9,0.0,0.0 +189,SlogP_VSA7,0.0,0.0 +190,SMR_VSA8,0.0,0.0 +191,SMR_VSA2,0.0,0.0 +192,fr_Imine,0.0,0.0 +193,fr_N_O,0.0,0.0 +194,fr_aldehyde,0.0,0.0 +195,fr_alkyl_carbamate,0.0,0.0 +196,fr_amidine,0.0,0.0 +197,fr_aryl_methyl,0.0,0.0 +198,fr_azide,0.0,0.0 +199,fr_azo,0.0,0.0 +200,fr_barbitur,0.0,0.0 +201,fr_benzodiazepine,0.0,0.0 +202,fr_diazo,0.0,0.0 +203,fr_dihydropyridine,0.0,0.0 +204,fr_epoxide,0.0,0.0 +205,fr_furan,0.0,0.0 +206,fr_guanido,0.0,0.0 +207,fr_hdrzine,0.0,0.0 +208,fr_hdrzone,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_heart.png b/reports/feature_importance/desc/desc_importance_biodist_heart.png new file mode 100644 index 0000000..c27e9bf Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_heart.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_kidney.csv b/reports/feature_importance/desc/desc_importance_biodist_kidney.csv new file mode 100644 index 0000000..82af60a --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_kidney.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.003104377721682017,0.18628353914049167 +1,MaxEStateIndex,0.0015123247756754208,0.09074965638847102 +2,BCUT2D_LOGPHI,0.001449630986032521,0.08698760741638875 +3,BCUT2D_CHGLO,0.0012579787374850176,0.07548718371011433 +4,BCUT2D_CHGHI,0.0010274615229321393,0.06165460069039997 +5,BCUT2D_LOGPLOW,0.0006267871472833137,0.0376114437583469 +6,SPS,0.00042227406666463935,0.02533928364326614 +7,MaxAbsEStateIndex,0.00041232135830561125,0.024742054213289313 +8,BCUT2D_MRHI,0.00037760332684396683,0.022658738859137793 +9,MinAbsPartialCharge,0.0002596331792549909,0.015579736696381137 +10,MaxAbsPartialCharge,0.00018251622294492736,0.010952200733586727 +11,LabuteASA,0.0001749106295198609,0.010495813983163398 +12,Kappa1,0.00016567690248506313,0.009941728267534894 +13,FractionCSP3,0.00016208943056915018,0.009726455828103364 +14,VSA_EState8,0.00015908785092871167,0.009546340865118132 +15,MolWt,0.0001368259916086038,0.00821047960280032 +16,Chi0v,0.0001357445572378215,0.008145586268290645 +17,Kappa2,0.0001253411485919149,0.007521311790301468 +18,SlogP_VSA2,0.00011663676003994146,0.006998989943256052 +19,Chi2n,0.00011647015488722845,0.006988992513732686 +20,VSA_EState7,0.00011433353970097238,0.006860781234577697 +21,Chi3v,0.00011165430044127369,0.006700008862061636 +22,NumHAcceptors,0.00011128654541754772,0.006677941087615925 +23,NumValenceElectrons,0.00011080404782947022,0.006648987987706057 +24,MinPartialCharge,0.00011038662691365925,0.006623939925749729 +25,HeavyAtomCount,0.00010760332023781593,0.0064569228084485125 +26,MolMR,0.00010452116404147556,0.0062719727102604 +27,BertzCT,0.00010214141396787338,0.006129171702868726 +28,ExactMolWt,9.979859480952278e-05,0.00598858679873946 +29,Chi1,9.06486949176258e-05,0.005439531275392819 +30,MaxPartialCharge,8.998715857455167e-05,0.0053998346462110006 +31,SMR_VSA5,8.906866674929658e-05,0.005344718960163807 +32,Chi4n,8.860956183704693e-05,0.005317169578111062 +33,NOCount,8.770146052783496e-05,0.005262677392898992 +34,Chi0n,8.759702711769152e-05,0.005256410686012728 +35,Ipc,8.678238406433724e-05,0.005207526624625755 +36,HeavyAtomMolWt,8.385073185732076e-05,0.005031607777883046 +37,FpDensityMorgan1,8.08302376202996e-05,0.0048503578119089 +38,FpDensityMorgan2,7.930428519628248e-05,0.004758790404978823 +39,SMR_VSA6,7.928077093451343e-05,0.004757379391147606 +40,Chi0,7.233398658897932e-05,0.004340525615753708 +41,Chi1v,7.153603209714887e-05,0.0042926429802828256 +42,Kappa3,6.875684363679099e-05,0.004125872983604274 +43,TPSA,6.797192644463285e-05,0.004078772673784948 +44,EState_VSA3,6.548800506792405e-05,0.003929720687691796 +45,fr_unbrch_alkane,6.408726598462923e-05,0.0038456669232203814 +46,PEOE_VSA6,6.219732815898181e-05,0.0037322579445197065 +47,BalabanJ,6.178383433494838e-05,0.003707445534478331 +48,Chi4v,6.17431378988754e-05,0.0037050034746448483 +49,NumHeteroatoms,6.0596451475873126e-05,0.0036361945782050565 +50,fr_ether,5.8926466134383145e-05,0.003535984227656303 +51,PEOE_VSA7,5.871099735165116e-05,0.0035230546517412108 +52,SMR_VSA10,5.721108958871092e-05,0.003433050099616859 +53,EState_VSA5,5.710580714623577e-05,0.0034267324450812505 +54,qed,5.5253244941168484e-05,0.003315566254953181 +55,SMR_VSA1,5.525285464499349e-05,0.0033155428345580775 +56,MolLogP,5.517774179251296e-05,0.003311035558301943 +57,Chi1n,5.346337647519089e-05,0.003208162110763661 +58,fr_amide,5.0810349358816544e-05,0.0030489626431144307 +59,fr_NH0,5.029127108185271e-05,0.003017814455879219 +60,PEOE_VSA2,4.9788393991850435e-05,0.0029876384488089396 +61,PEOE_VSA14,4.9700593096356385e-05,0.0029823698046494026 +62,EState_VSA10,4.9623657018031535e-05,0.0029777531225820613 +63,fr_NH1,4.943402583089984e-05,0.002966373976151657 +64,fr_C_O,4.935291115736899e-05,0.002961506550272384 +65,Chi2v,4.927364457811384e-05,0.0029567500224774324 +66,SlogP_VSA3,4.904151618216947e-05,0.002942820757739605 +67,PEOE_VSA1,4.8714844505866376e-05,0.0029232182604102125 +68,PEOE_VSA9,4.850711395427413e-05,0.002910753030400366 +69,FpDensityMorgan3,4.8287432116560836e-05,0.002897570642855094 +70,SlogP_VSA5,4.749844940366094e-05,0.0028502263744520774 +71,BCUT2D_MWHI,4.5015583317308974e-05,0.0027012377128767897 +72,VSA_EState2,4.2136145461009934e-05,0.0025284520783001225 +73,EState_VSA4,4.1831934943235935e-05,0.0025101974015258035 +74,NHOHCount,4.088910292389298e-05,0.002453621140154162 +75,EState_VSA2,4.0745274960436076e-05,0.0024449904951546857 +76,NumRotatableBonds,4.0335993844433405e-05,0.0024204308759241244 +77,EState_VSA9,3.864510273664106e-05,0.0023189660388133622 +78,SMR_VSA3,3.708782131451258e-05,0.0022255186813201987 +79,MinAbsEStateIndex,3.499441478576786e-05,0.0020999002121787937 +80,fr_C_O_noCOO,3.491279810737572e-05,0.002095002662632039 +81,Chi3n,3.4568125280744395e-05,0.002074319975231599 +82,fr_ester,3.4514666934451386e-05,0.002071112114965592 +83,AvgIpc,3.439865014138093e-05,0.002064150327209563 +84,PEOE_VSA8,3.14926107017546e-05,0.0018897684187470418 +85,fr_Al_OH,3.0567045547130047e-05,0.0018342282853720483 +86,VSA_EState1,2.7665609521926672e-05,0.0016601226127312342 +87,EState_VSA8,2.7058888674368435e-05,0.0016237152818951435 +88,NumHDonors,2.6567371350402927e-05,0.0015942209371774688 +89,HallKierAlpha,2.393376536217566e-05,0.0014361868678172652 +90,PEOE_VSA12,2.3528662497188035e-05,0.0014118779717448634 +91,SlogP_VSA1,2.3160191294889317e-05,0.0013897672217687404 +92,BCUT2D_MRLOW,2.2619810326981295e-05,0.0013573407298238334 +93,PEOE_VSA10,2.174594561475292e-05,0.0013049029706597558 +94,MinEStateIndex,2.171402321594237e-05,0.0013029874120643085 +95,VSA_EState5,1.945926401131051e-05,0.0011676866973301346 +96,VSA_EState3,1.7815663362081474e-05,0.001069059605744739 +97,RingCount,1.7296870366910907e-05,0.001037928537335542 +98,fr_Al_OH_noTert,1.7254405637991995e-05,0.0010353803680401607 +99,EState_VSA1,1.540727359615665e-05,0.0009245400242219298 +100,VSA_EState4,1.1085831784282549e-05,0.0006652244553453972 +101,fr_NH2,1.0125941939168646e-05,0.0006076246097196683 +102,fr_oxazole,8.353549521899545e-06,0.0005012691459728896 +103,NumAromaticHeterocycles,7.922694708598151e-06,0.0004754149598289392 +104,NumAliphaticRings,7.896368517464427e-06,0.0004738352113266139 +105,EState_VSA7,7.839143955943844e-06,0.00047040135282049077 +106,NumAliphaticHeterocycles,7.703187421353628e-06,0.0004622430464855839 +107,VSA_EState6,7.5968285508048484e-06,0.0004558607990269605 +108,PEOE_VSA11,7.095189538236204e-06,0.0004257590849283305 +109,EState_VSA6,6.8495818720998e-06,0.00041102097333566777 +110,NumSaturatedHeterocycles,5.594313837886118e-06,0.000335696450050352 +111,SMR_VSA4,5.5013434416869925e-06,0.0003301176010854512 +112,VSA_EState9,5.294942309201807e-06,0.0003177321451619004 +113,PEOE_VSA13,5.223063149962858e-06,0.00031341891224570117 +114,fr_Ar_N,5.00394824555643e-06,0.00030027054451126707 +115,SlogP_VSA6,4.863456159041741e-06,0.00029184007456091313 +116,NumAromaticRings,4.83394740936262e-06,0.0002900693511442861 +117,fr_SH,4.763492589616484e-06,0.0002858415881758338 +118,NumSaturatedRings,4.349624416005862e-06,0.0002610067146424593 +119,PEOE_VSA5,4.314136319399025e-06,0.00025887719020118116 +120,SlogP_VSA11,4.3018552536705685e-06,0.00025814024367166083 +121,fr_Ar_NH,4.155014610570978e-06,0.0002493288176344205 +122,NumAromaticCarbocycles,4.09878174866094e-06,0.00024595446777375836 +123,fr_Ar_OH,4.027589041990684e-06,0.00024168242662783092 +124,SMR_VSA7,3.941698294486318e-06,0.00023652840419274317 +125,SlogP_VSA10,3.5797830780572347e-06,0.00021481105745547896 +126,fr_alkyl_halide,3.4836800117910426e-06,0.00020904422722045862 +127,SlogP_VSA12,3.4529046591289926e-06,0.00020719749910739986 +128,fr_halogen,3.4441821589920563e-06,0.0002066740904434627 +129,fr_phenol,3.3602829721144243e-06,0.00020163957503851814 +130,VSA_EState10,3.170845893392136e-06,0.00019027207641798835 +131,fr_Ndealkylation2,2.87699442647898e-06,0.00017263901235626433 +132,SMR_VSA9,2.860452390882169e-06,0.000171646378981131 +133,fr_benzene,2.7963278552869737e-06,0.00016779847563065173 +134,fr_allylic_oxid,2.7382497689250845e-06,0.00016431339989439827 +135,PEOE_VSA4,2.3554899181549234e-06,0.00014134523492389357 +136,fr_piperzine,2.2871964499741387e-06,0.00013724716758368533 +137,PEOE_VSA3,2.1122763989480423e-06,0.00012675078824679153 +138,SlogP_VSA4,1.8967659693447366e-06,0.00011381871323935988 +139,fr_imidazole,1.8629239766543043e-06,0.00011178796610253125 +140,fr_aniline,1.8593937071035018e-06,0.00011157612618967197 +141,fr_para_hydroxylation,1.772413521584502e-06,0.0001063567301476218 +142,fr_phenol_noOrthoHbond,1.6546242407603233e-06,9.928858120701785e-05 +143,fr_morpholine,1.5149275937373432e-06,9.090584297521503e-05 +144,fr_Nhpyrrole,1.3068024548780294e-06,7.841693507588484e-05 +145,fr_priamide,1.1961331202739558e-06,7.177603078760125e-05 +146,fr_bicyclic,8.201167813013223e-07,4.9212521872674255e-05 +147,fr_Ndealkylation1,7.660094963999547e-07,4.596572092628411e-05 +148,fr_methoxy,7.009840983402403e-07,4.206375976995301e-05 +149,fr_thiazole,6.240670051229583e-07,3.7448216936733935e-05 +150,SlogP_VSA8,4.020349535043417e-07,2.412480075278773e-05 +151,fr_quatN,9.197851898785228e-10,5.5193296622018114e-08 +152,fr_nitrile,0.0,0.0 +153,fr_nitro_arom_nonortho,0.0,0.0 +154,fr_nitroso,0.0,0.0 +155,fr_lactone,0.0,0.0 +156,fr_oxime,0.0,0.0 +157,fr_nitro_arom,0.0,0.0 +158,fr_lactam,0.0,0.0 +159,fr_nitro,0.0,0.0 +160,fr_piperdine,0.0,0.0 +161,fr_phos_acid,0.0,0.0 +162,fr_phos_ester,0.0,0.0 +163,fr_ketone,0.0,0.0 +164,fr_prisulfonamd,0.0,0.0 +165,fr_pyridine,0.0,0.0 +166,fr_sulfide,0.0,0.0 +167,fr_sulfonamd,0.0,0.0 +168,fr_sulfone,0.0,0.0 +169,fr_term_acetylene,0.0,0.0 +170,fr_tetrazole,0.0,0.0 +171,fr_thiocyan,0.0,0.0 +172,fr_thiophene,0.0,0.0 +173,NumRadicalElectrons,0.0,0.0 +174,fr_ketone_Topliss,0.0,0.0 +175,NumSaturatedCarbocycles,0.0,0.0 +176,fr_isothiocyan,0.0,0.0 +177,SlogP_VSA7,0.0,0.0 +178,fr_aldehyde,0.0,0.0 +179,SMR_VSA8,0.0,0.0 +180,fr_N_O,0.0,0.0 +181,fr_Imine,0.0,0.0 +182,fr_HOCCN,0.0,0.0 +183,fr_C_S,0.0,0.0 +184,fr_COO2,0.0,0.0 +185,fr_isocyan,0.0,0.0 +186,fr_COO,0.0,0.0 +187,SlogP_VSA9,0.0,0.0 +188,fr_Ar_COO,0.0,0.0 +189,fr_ArN,0.0,0.0 +190,EState_VSA11,0.0,0.0 +191,fr_Al_COO,0.0,0.0 +192,fr_alkyl_carbamate,0.0,0.0 +193,fr_amidine,0.0,0.0 +194,fr_aryl_methyl,0.0,0.0 +195,fr_azide,0.0,0.0 +196,fr_azo,0.0,0.0 +197,fr_barbitur,0.0,0.0 +198,fr_benzodiazepine,0.0,0.0 +199,fr_diazo,0.0,0.0 +200,fr_dihydropyridine,0.0,0.0 +201,fr_epoxide,0.0,0.0 +202,SMR_VSA2,0.0,0.0 +203,NumAliphaticCarbocycles,0.0,0.0 +204,fr_furan,0.0,0.0 +205,fr_guanido,0.0,0.0 +206,fr_hdrzine,0.0,0.0 +207,fr_hdrzone,0.0,0.0 +208,fr_imide,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_kidney.png b/reports/feature_importance/desc/desc_importance_biodist_kidney.png new file mode 100644 index 0000000..024b867 Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_kidney.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_liver.csv b/reports/feature_importance/desc/desc_importance_biodist_liver.csv new file mode 100644 index 0000000..d13debc --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_liver.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.053332374013140685,0.25374106216254844 +1,BCUT2D_CHGHI,0.015341146022911046,0.0729890382469207 +2,BCUT2D_CHGLO,0.013885965751178925,0.06606568269375675 +3,BCUT2D_LOGPHI,0.013335606504995927,0.06344722172550799 +4,MaxEStateIndex,0.008946024130596997,0.042562771806673505 +5,SPS,0.008333530961741106,0.0396486943798088 +6,BCUT2D_MRHI,0.005276272018388418,0.025103080276804083 +7,BCUT2D_LOGPLOW,0.004162076472863581,0.019802038153522775 +8,MaxAbsEStateIndex,0.003124883096615993,0.014867351599120048 +9,MinAbsPartialCharge,0.002695026387500681,0.012822209226088944 +10,MolWt,0.002512539076247597,0.011953983780562442 +11,FractionCSP3,0.0023864069575204737,0.01135388115301492 +12,MaxPartialCharge,0.0020888654791870738,0.009938258988302553 +13,MaxAbsPartialCharge,0.0020580464139418023,0.009791630181786921 +14,Kappa1,0.0019512299468754705,0.009283426219158447 +15,LabuteASA,0.0019494957758459945,0.009275175500769608 +16,HeavyAtomMolWt,0.0017156926901091079,0.008162803430155581 +17,SlogP_VSA2,0.0016252978385722663,0.007732729088493374 +18,Chi0,0.0016084974742613157,0.0076527974828999685 +19,qed,0.0014944762698370757,0.007110315321642134 +20,fr_unbrch_alkane,0.0014858090426795984,0.007069078990696121 +21,Chi1,0.0014671115047414688,0.006980121144284173 +22,SMR_VSA5,0.0014290820456453926,0.006799187227070438 +23,NumHeteroatoms,0.0014133263110247926,0.006724225688009806 +24,FpDensityMorgan3,0.0013163286896564494,0.006262737146975052 +25,PEOE_VSA6,0.0012802885958943,0.006091268093874351 +26,MinPartialCharge,0.0012766542140054927,0.00607397668433494 +27,NumHAcceptors,0.0012755673085782615,0.006068805481239651 +28,Chi0v,0.0012704247434828602,0.006044338542467608 +29,PEOE_VSA2,0.001201912864982489,0.005718377488921552 +30,Kappa3,0.001153117298488931,0.0054862213342405704 +31,fr_C_O,0.001128805602370165,0.005370552836254172 +32,Ipc,0.0011249112968146737,0.005352024779959652 +33,NumValenceElectrons,0.00111939363454917,0.00532577322994318 +34,Chi2v,0.001064882889305516,0.005066425795043904 +35,VSA_EState8,0.001058764883796688,0.00503731797367177 +36,PEOE_VSA1,0.001046492985165465,0.004978931587334076 +37,Chi1v,0.001043374491428188,0.00496409463458472 +38,TPSA,0.001013848464237276,0.0048236177546502855 +39,ExactMolWt,0.0010128318971557677,0.004818781202447344 +40,FpDensityMorgan2,0.0010091990027346546,0.00480149686987834 +41,MolLogP,0.001000211711270274,0.004758737759219316 +42,fr_amide,0.0009882108659633449,0.004701640971548128 +43,Chi2n,0.0009691356328362671,0.004610886153218264 +44,Chi1n,0.0009597190231726626,0.004566084462271141 +45,PEOE_VSA14,0.0009071783337019447,0.004316110021797747 +46,Chi0n,0.0008487116715382006,0.00403794140033582 +47,NOCount,0.0008181686173350611,0.003892625779972035 +48,BalabanJ,0.0008114755835794854,0.003860782129175408 +49,FpDensityMorgan1,0.0008017798658722243,0.003814652517377844 +50,HeavyAtomCount,0.0007685718631431709,0.0036566577901472382 +51,EState_VSA5,0.0007507167756725658,0.0035717080960143604 +52,Chi3v,0.0007497036717485959,0.0035668880205816443 +53,Chi4n,0.0007475180372910496,0.0035564893608741235 +54,fr_NH0,0.0007390684302786517,0.003516288407928476 +55,SlogP_VSA3,0.0007282908299223219,0.003465011490060451 +56,SMR_VSA6,0.0007265338136084126,0.003456652079965217 +57,SlogP_VSA5,0.0007110880311044149,0.0033831652095414394 +58,Kappa2,0.0007019222693909374,0.003339556985536006 +59,EState_VSA4,0.0006885115652136152,0.0032757524693248392 +60,Chi4v,0.0006615149573164618,0.0031473098846965656 +61,VSA_EState7,0.0006068811562011739,0.002887376983124345 +62,VSA_EState2,0.0006064971586460924,0.0028855500262468916 +63,PEOE_VSA7,0.0005828110496020016,0.002772857902962361 +64,SMR_VSA1,0.000582562341378156,0.0027716746162616494 +65,fr_ester,0.0005808372733423156,0.002763467207463216 +66,MolMR,0.0005777489932616045,0.0027487740031490403 +67,BertzCT,0.0005606361905611596,0.002667355899902425 +68,NumRotatableBonds,0.0005599652028128477,0.0026641635210311455 +69,VSA_EState5,0.0005500547730384193,0.0026170123670841787 +70,PEOE_VSA12,0.0005464093273649953,0.0025996683190393165 +71,fr_ether,0.0005456070069179859,0.0025958510945825776 +72,EState_VSA3,0.0005396609192486873,0.0025675612119580448 +73,EState_VSA2,0.0005325340675702121,0.0025336535724007716 +74,PEOE_VSA9,0.000509755000727925,0.002425276911459155 +75,BCUT2D_MWHI,0.00048218619599065747,0.0022941119684760703 +76,fr_C_O_noCOO,0.0004804453515628628,0.0022858294998565495 +77,fr_Al_OH,0.0004646957696309545,0.0022108972336305267 +78,SMR_VSA3,0.0004324500750859148,0.002057480909391679 +79,VSA_EState1,0.000432198622437696,0.002056284565459438 +80,VSA_EState6,0.000423522852725143,0.0020150075913386452 +81,Chi3n,0.00039949166333677687,0.0019006736687772576 +82,AvgIpc,0.0003865750104769076,0.0018392197155847633 +83,EState_VSA10,0.0003863243619896519,0.0018380271976336587 +84,MinEStateIndex,0.0003662380502983998,0.0017424619399871394 +85,EState_VSA9,0.0003591933570463285,0.0017089451880805118 +86,SMR_VSA10,0.0003462139201856692,0.0016471925255329201 +87,fr_NH1,0.0003401252878783136,0.0016182244539371393 +88,BCUT2D_MRLOW,0.00033646228683880633,0.0016007968821917286 +89,PEOE_VSA11,0.0003207694350076865,0.0015261345224961574 +90,SlogP_VSA1,0.0003042975198550516,0.001447765589479016 +91,EState_VSA8,0.00030400615064765484,0.0014463793333159562 +92,NumAliphaticRings,0.0003028899070212873,0.0014410685469759955 +93,NumHDonors,0.00029951047212071845,0.0014249901064968928 +94,PEOE_VSA8,0.00029044374402162294,0.0013818530577385407 +95,HallKierAlpha,0.00028614997078980973,0.0013614244418989952 +96,MinAbsEStateIndex,0.0002857294316334796,0.0013594236299309802 +97,EState_VSA1,0.0002797949851097474,0.0013311891327396938 +98,NHOHCount,0.0002466239077499931,0.0011733700864646063 +99,PEOE_VSA10,0.0002457454007319216,0.0011691903868354884 +100,NumSaturatedHeterocycles,0.00019380007825020687,0.0009220485420408278 +101,VSA_EState3,0.00019264619355530415,0.0009165586696412683 +102,EState_VSA6,0.00019191891894805232,0.000913098493064796 +103,fr_SH,0.00019035038058313577,0.0009056358102549407 +104,fr_phenol,0.00018009022719279934,0.000856820765596078 +105,NumAliphaticHeterocycles,0.00016171843692534813,0.0007694127388102828 +106,RingCount,0.0001567618249207889,0.0007458305147290033 +107,fr_NH2,0.00015534255742569338,0.0007390780224883693 +108,fr_oxazole,0.00015391090551865192,0.0007322666085533693 +109,fr_Al_OH_noTert,0.000152980890755542,0.000727841848955002 +110,PEOE_VSA13,0.00015012118563561245,0.0007142361427019143 +111,EState_VSA7,0.00014118608762483207,0.000671725354428674 +112,NumAromaticRings,0.00013902020377047083,0.0006614206627682029 +113,SlogP_VSA12,0.00012994908488620768,0.0006182627238373659 +114,SlogP_VSA11,0.00012837067979377874,0.0006107530977971485 +115,VSA_EState4,0.00012391427119038247,0.0005895507066908108 +116,fr_phenol_noOrthoHbond,0.00010409149134352364,0.0004952392625365153 +117,VSA_EState10,9.740767597444317e-05,0.0004634394703384224 +118,SlogP_VSA8,9.561795532435611e-05,0.0004549244618256672 +119,fr_benzene,9.294710454943059e-05,0.0004422172736487395 +120,NumAromaticHeterocycles,8.770310683156147e-05,0.0004172677457956903 +121,PEOE_VSA4,8.639745691495415e-05,0.00041105581537289015 +122,NumAromaticCarbocycles,8.553707886181716e-05,0.00040696236847306654 +123,fr_Ndealkylation2,7.995981939451655e-05,0.00038042727103224577 +124,PEOE_VSA5,7.768856287315365e-05,0.00036962124462074124 +125,SMR_VSA7,7.611640805215656e-05,0.00036214135568751125 +126,fr_Ar_OH,7.097060037290515e-05,0.00033765898957540573 +127,fr_imidazole,6.60447357968993e-05,0.00031422305347256825 +128,VSA_EState9,6.574292361618105e-05,0.00031278711245688845 +129,SMR_VSA4,6.198426100870348e-05,0.00029490440875243643 +130,fr_priamide,5.8950847748806914e-05,0.00028047224598476243 +131,NumSaturatedRings,5.7748337234966836e-05,0.0002747510250436464 +132,fr_Nhpyrrole,5.633497515306997e-05,0.0002680266291674677 +133,SlogP_VSA6,5.5943427547820704e-05,0.00026616375118609996 +134,fr_allylic_oxid,5.099672095612746e-05,0.000242628654389669 +135,fr_Ar_N,4.788092027415579e-05,0.0002278045144716619 +136,SlogP_VSA10,4.532377244070704e-05,0.00021563829424666172 +137,fr_morpholine,4.500718837211555e-05,0.00021413207256959268 +138,PEOE_VSA3,3.9860602081127347e-05,0.00018964600203268402 +139,fr_piperzine,3.108773217744888e-05,0.00014790705137159496 +140,fr_aniline,3.065292949543326e-05,0.0001458383773924733 +141,fr_Ndealkylation1,2.9208873459201216e-05,0.00013896794795377108 +142,fr_para_hydroxylation,2.872687568071086e-05,0.00013667473242498182 +143,SMR_VSA9,2.599899842690257e-05,0.0001236962276305051 +144,fr_alkyl_halide,2.358176284619006e-05,0.00011219567219683975 +145,fr_methoxy,2.2889095196242206e-05,0.00010890014619643897 +146,SlogP_VSA4,1.8931971023617852e-05,9.007321585159063e-05 +147,fr_Ar_NH,1.6998330023921577e-05,8.087347310278493e-05 +148,fr_halogen,1.0934707390459273e-05,5.2024390795132584e-05 +149,fr_bicyclic,1.0036306895872215e-05,4.775004336616362e-05 +150,fr_quatN,5.631654705704883e-06,2.6793895325308063e-05 +151,fr_thiazole,3.5727953597668267e-06,1.699839743217297e-05 +152,fr_nitroso,0.0,0.0 +153,SlogP_VSA9,0.0,0.0 +154,fr_nitro_arom,0.0,0.0 +155,fr_nitro,0.0,0.0 +156,fr_oxime,0.0,0.0 +157,fr_nitrile,0.0,0.0 +158,SlogP_VSA7,0.0,0.0 +159,fr_nitro_arom_nonortho,0.0,0.0 +160,fr_COO2,0.0,0.0 +161,fr_phos_acid,0.0,0.0 +162,fr_phos_ester,0.0,0.0 +163,fr_piperdine,0.0,0.0 +164,fr_lactam,0.0,0.0 +165,EState_VSA11,0.0,0.0 +166,fr_prisulfonamd,0.0,0.0 +167,fr_pyridine,0.0,0.0 +168,fr_sulfide,0.0,0.0 +169,fr_sulfonamd,0.0,0.0 +170,fr_sulfone,0.0,0.0 +171,fr_term_acetylene,0.0,0.0 +172,fr_tetrazole,0.0,0.0 +173,fr_thiocyan,0.0,0.0 +174,fr_thiophene,0.0,0.0 +175,NumRadicalElectrons,0.0,0.0 +176,fr_lactone,0.0,0.0 +177,NumAliphaticCarbocycles,0.0,0.0 +178,fr_ketone_Topliss,0.0,0.0 +179,fr_ketone,0.0,0.0 +180,fr_C_S,0.0,0.0 +181,fr_HOCCN,0.0,0.0 +182,fr_Imine,0.0,0.0 +183,fr_N_O,0.0,0.0 +184,fr_Ar_COO,0.0,0.0 +185,fr_ArN,0.0,0.0 +186,SMR_VSA2,0.0,0.0 +187,fr_aldehyde,0.0,0.0 +188,fr_alkyl_carbamate,0.0,0.0 +189,fr_Al_COO,0.0,0.0 +190,NumSaturatedCarbocycles,0.0,0.0 +191,fr_amidine,0.0,0.0 +192,fr_aryl_methyl,0.0,0.0 +193,fr_azide,0.0,0.0 +194,fr_azo,0.0,0.0 +195,fr_barbitur,0.0,0.0 +196,SMR_VSA8,0.0,0.0 +197,fr_benzodiazepine,0.0,0.0 +198,fr_diazo,0.0,0.0 +199,fr_dihydropyridine,0.0,0.0 +200,fr_epoxide,0.0,0.0 +201,fr_furan,0.0,0.0 +202,fr_guanido,0.0,0.0 +203,fr_hdrzine,0.0,0.0 +204,fr_hdrzone,0.0,0.0 +205,fr_COO,0.0,0.0 +206,fr_imide,0.0,0.0 +207,fr_isocyan,0.0,0.0 +208,fr_isothiocyan,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_liver.png b/reports/feature_importance/desc/desc_importance_biodist_liver.png new file mode 100644 index 0000000..cd41221 Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_liver.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_lung.csv b/reports/feature_importance/desc/desc_importance_biodist_lung.csv new file mode 100644 index 0000000..548b46e --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_lung.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.014726849987524234,0.16303564830118875 +1,BCUT2D_CHGLO,0.005983707201308257,0.0662434657537913 +2,MaxEStateIndex,0.005973841242521892,0.06613424327999612 +3,BCUT2D_CHGHI,0.005914109336186457,0.06547297287377121 +4,BCUT2D_LOGPHI,0.004304677330406679,0.04765553121576051 +5,SPS,0.003359709794079829,0.03719413621009813 +6,BCUT2D_LOGPLOW,0.0027714327666110315,0.03068153326817088 +7,MaxAbsEStateIndex,0.0022804269717924294,0.0252457850840238 +8,BCUT2D_MRHI,0.002105488260146048,0.023309101659503 +9,VSA_EState8,0.0013168601086800868,0.014578483635163635 +10,HeavyAtomMolWt,0.001273657181813157,0.014100199603193071 +11,MolWt,0.001179703006739548,0.01306006679429601 +12,MinAbsPartialCharge,0.0011484672769491167,0.012714267287893904 +13,FractionCSP3,0.001137112911060617,0.012588567195528689 +14,MaxPartialCharge,0.000986173060166036,0.01091756650862485 +15,MaxAbsPartialCharge,0.0008377721056063184,0.009274673027965025 +16,Kappa1,0.0007683137189186859,0.008505724263418226 +17,SlogP_VSA2,0.0007636962766158913,0.008454606223921171 +18,fr_C_O,0.0007623506351057147,0.008439709111763548 +19,Kappa3,0.0007335238706367486,0.008120578392186384 +20,Chi2v,0.0007194789348853568,0.007965091970615137 +21,MolLogP,0.0007183752403415092,0.00795287336611824 +22,Chi0,0.0006349145232038025,0.0070289098479351065 +23,SMR_VSA5,0.0006291973363537743,0.006965616932929996 +24,BalabanJ,0.0006077507422251615,0.006728189419200112 +25,SMR_VSA6,0.0006071505645776395,0.0067215450687828455 +26,Chi1,0.0005972603786934543,0.006612054385519732 +27,NumHAcceptors,0.0005964485506636349,0.006603066930003935 +28,Kappa2,0.000596009100102834,0.006598201931233853 +29,HeavyAtomCount,0.0005897211709304583,0.006528590533687284 +30,fr_ether,0.0005879737616570559,0.006509245595429747 +31,fr_NH0,0.0005831658318947979,0.006456018737925232 +32,FpDensityMorgan1,0.0005671145573399219,0.006278320862593338 +33,PEOE_VSA2,0.0005600679684566087,0.006200310616826175 +34,BertzCT,0.000532888789850104,0.005899419726502776 +35,Chi3v,0.0005237404204482026,0.005798141426146538 +36,fr_amide,0.000518087918834696,0.005735564618080845 +37,Chi4n,0.0005115182808469074,0.005662834523773468 +38,fr_NH1,0.0005044510123743243,0.005584595341726917 +39,SMR_VSA1,0.0004918257258541867,0.005444825345119183 +40,SlogP_VSA5,0.0004812487617386174,0.005327731587587797 +41,LabuteASA,0.00047486266486935266,0.0052570334108509665 +42,Chi4v,0.000471670109563622,0.005221689781734887 +43,Chi0v,0.0004688625485931293,0.005190608286143016 +44,ExactMolWt,0.0004680029606255084,0.005181092097569648 +45,AvgIpc,0.0004630921788864821,0.0051267266029006555 +46,PEOE_VSA14,0.00045694816689562464,0.005058708460599464 +47,NOCount,0.0004557883189658625,0.005045868202206138 +48,qed,0.00045283487671843076,0.005013171707576393 +49,FpDensityMorgan3,0.00045112538421526223,0.0049942465322159995 +50,MinPartialCharge,0.0004426484371238668,0.004900401306262634 +51,FpDensityMorgan2,0.0004415663679115627,0.004888422108016967 +52,PEOE_VSA6,0.0004407714103593667,0.004879621419478414 +53,SMR_VSA10,0.0004385381540820903,0.004854897844151924 +54,NumValenceElectrons,0.0004215732652334272,0.004667085674262063 +55,TPSA,0.0004134462152236791,0.004577114032788297 +56,Chi0n,0.00040604726817358727,0.00449520295675508 +57,PEOE_VSA7,0.00040484568625227455,0.004481900676383231 +58,MolMR,0.0004030156280970024,0.004461640762143157 +59,PEOE_VSA9,0.0003831554908884336,0.004241776341177876 +60,Chi2n,0.0003732035852247278,0.004131602380480982 +61,VSA_EState2,0.0003729404150090489,0.004128688917876072 +62,Ipc,0.0003711811605269887,0.004109212845582782 +63,VSA_EState7,0.00036041070472384913,0.003989977011317361 +64,EState_VSA9,0.0003406340798002362,0.0037710371247589914 +65,PEOE_VSA8,0.00032215749922579097,0.0035664895606230275 +66,Chi1v,0.0003190714228384238,0.003532324721234589 +67,fr_C_O_noCOO,0.00030916359414557816,0.003422638720795688 +68,Chi3n,0.0003042855278450862,0.0033686354069552794 +69,NumRotatableBonds,0.0002884544826740879,0.0031933756117563327 +70,Chi1n,0.0002852688340188245,0.003158108443678253 +71,fr_unbrch_alkane,0.00028413932989275096,0.0031456041105989273 +72,NumHeteroatoms,0.00028258950722529375,0.0031284465824408477 +73,fr_NH2,0.00028062106111530265,0.003106654625032425 +74,VSA_EState1,0.000277502136564881,0.0030721261354700775 +75,EState_VSA3,0.0002702121199411247,0.0029914209889256773 +76,SMR_VSA3,0.0002581342579650695,0.0028577113321405145 +77,fr_ester,0.0002520315781646559,0.0027901507636226125 +78,BCUT2D_MRLOW,0.00024726827619572854,0.002737417964333132 +79,PEOE_VSA1,0.00024153148256402318,0.0026739079897148293 +80,MinAbsEStateIndex,0.00024060646903329276,0.002663667498313222 +81,VSA_EState5,0.0002310046458168792,0.0025573691742121723 +82,NumHDonors,0.0002288127734575211,0.0025331037453251984 +83,EState_VSA10,0.00020485750440433082,0.002267903595692055 +84,EState_VSA2,0.00019720229131005691,0.002183155490648139 +85,BCUT2D_MWHI,0.00019455510307497584,0.002153849423807821 +86,HallKierAlpha,0.00019101569345292494,0.002114665895570392 +87,SlogP_VSA3,0.00019024641813881194,0.00210614952583316 +88,PEOE_VSA12,0.0001879901443133234,0.002081171131526492 +89,SMR_VSA4,0.00018017947286169306,0.0019947020030392114 +90,EState_VSA4,0.00017487012445828573,0.0019359241204818622 +91,SlogP_VSA1,0.0001544305276479841,0.0017096447111169645 +92,EState_VSA5,0.00014937602583444504,0.001653688143303211 +93,MinEStateIndex,0.00014794413838120743,0.0016378362333950706 +94,EState_VSA8,0.00013287128988858125,0.0014709701603500755 +95,NHOHCount,0.0001325853950941652,0.0014678051221244507 +96,fr_Al_OH_noTert,0.000128723110860007,0.0014250471654281525 +97,RingCount,0.0001261160017222519,0.0013961847998288602 +98,fr_Al_OH,0.00012503029004571896,0.0013841652771745471 +99,VSA_EState4,0.00011345160273392428,0.0012559818031828666 +100,VSA_EState6,9.648874915207688e-05,0.0010681921649983632 +101,fr_SH,9.287757318595466e-05,0.0010282141374320362 +102,NumAliphaticRings,9.122970734158816e-05,0.001009971208599511 +103,SlogP_VSA12,8.133196165453642e-05,0.000900396833483597 +104,EState_VSA1,7.787925661476869e-05,0.0008621731804262097 +105,VSA_EState10,7.527730412731738e-05,0.0008333679022438501 +106,EState_VSA7,7.226303957837879e-05,0.0007999980658359415 +107,fr_priamide,6.933647167324174e-05,0.0007675990873635816 +108,VSA_EState3,6.752581373213399e-05,0.0007475539459022185 +109,EState_VSA6,6.700054164994797e-05,0.0007417388480010203 +110,PEOE_VSA11,6.171022468909897e-05,0.0006831716556848453 +111,SlogP_VSA4,6.141384994362038e-05,0.0006798905976982334 +112,NumAliphaticHeterocycles,5.68150724511404e-05,0.0006289791895889265 +113,NumAromaticRings,5.078092299774762e-05,0.0005621772958429074 +114,fr_Nhpyrrole,5.04962162670908e-05,0.0005590254102429544 +115,fr_imidazole,5.009304063920676e-05,0.0005545619981808367 +116,NumSaturatedRings,4.891941388834663e-05,0.000541569199425353 +117,fr_thiazole,4.760692269563074e-05,0.0005270390824841667 +118,PEOE_VSA10,4.7138122984220716e-05,0.0005218491698458298 +119,fr_phenol,4.5939244548357274e-05,0.0005085768187869887 +120,PEOE_VSA5,4.484848801994766e-05,0.0004965014463958348 +121,NumSaturatedHeterocycles,3.9724862885263155e-05,0.00043977963920738635 +122,fr_alkyl_halide,3.9223910877958415e-05,0.0004342337800896593 +123,fr_morpholine,3.773931940698095e-05,0.00041779840299691577 +124,NumAromaticCarbocycles,3.677642123774598e-05,0.0004071385044169586 +125,SlogP_VSA10,3.604217717300546e-05,0.00039900995301542463 +126,NumAromaticHeterocycles,3.497050131588376e-05,0.00038714581585896674 +127,fr_Ndealkylation2,3.188217258619586e-05,0.0003529560416004934 +128,PEOE_VSA13,3.163994930284876e-05,0.00035027447493365076 +129,fr_Ar_N,3.106722352227755e-05,0.00034393403424108355 +130,SlogP_VSA6,2.6798901140269738e-05,0.0002966809755558497 +131,PEOE_VSA4,2.627467519129575e-05,0.00029087745901839034 +132,SlogP_VSA8,2.4101717520049498e-05,0.0002668214278261944 +133,fr_methoxy,2.3050563703068464e-05,0.00025518448277949725 +134,SMR_VSA7,2.2765669709381006e-05,0.00025203052405802607 +135,fr_phenol_noOrthoHbond,2.2536665185915026e-05,0.00024949529751746853 +136,PEOE_VSA3,2.0895199675349645e-05,0.0002313232244736215 +137,VSA_EState9,1.9283572716270772e-05,0.00021348148327875042 +138,fr_benzene,1.717865135955054e-05,0.000190178657602749 +139,SMR_VSA9,1.6814687998619967e-05,0.00018614935041502968 +140,SlogP_VSA11,1.653894589888514e-05,0.00018309670901294557 +141,fr_allylic_oxid,1.5657264259274564e-05,0.0001733359293600708 +142,fr_Ar_OH,1.3404273669065547e-05,0.00014839388256782563 +143,fr_Ar_NH,1.2488529249661664e-05,0.0001382560061569037 +144,fr_halogen,1.1228239771194224e-05,0.00012430379557940949 +145,fr_piperzine,9.760095654955483e-06,0.00010805050122295639 +146,fr_aniline,9.218204481233575e-06,0.00010205141934928439 +147,fr_oxazole,9.151243200703294e-06,0.00010131011514698855 +148,fr_Ndealkylation1,5.7394304739144555e-06,6.353916614801821e-05 +149,fr_para_hydroxylation,5.128272086889107e-06,5.6773251921432317e-05 +150,fr_quatN,4.448571954593387e-06,4.924853673705052e-05 +151,fr_bicyclic,2.7881858433087463e-06,3.0867000542079776e-05 +152,fr_nitrile,0.0,0.0 +153,SMR_VSA2,0.0,0.0 +154,fr_nitro_arom,0.0,0.0 +155,fr_nitroso,0.0,0.0 +156,fr_nitro_arom_nonortho,0.0,0.0 +157,fr_oxime,0.0,0.0 +158,fr_lactone,0.0,0.0 +159,fr_nitro,0.0,0.0 +160,fr_Al_COO,0.0,0.0 +161,fr_phos_acid,0.0,0.0 +162,fr_phos_ester,0.0,0.0 +163,fr_piperdine,0.0,0.0 +164,fr_ketone_Topliss,0.0,0.0 +165,fr_prisulfonamd,0.0,0.0 +166,fr_pyridine,0.0,0.0 +167,fr_sulfide,0.0,0.0 +168,fr_sulfonamd,0.0,0.0 +169,fr_sulfone,0.0,0.0 +170,fr_term_acetylene,0.0,0.0 +171,fr_tetrazole,0.0,0.0 +172,fr_thiocyan,0.0,0.0 +173,fr_thiophene,0.0,0.0 +174,NumRadicalElectrons,0.0,0.0 +175,fr_lactam,0.0,0.0 +176,fr_furan,0.0,0.0 +177,fr_ketone,0.0,0.0 +178,fr_amidine,0.0,0.0 +179,fr_ArN,0.0,0.0 +180,fr_Ar_COO,0.0,0.0 +181,fr_COO,0.0,0.0 +182,fr_COO2,0.0,0.0 +183,NumAliphaticCarbocycles,0.0,0.0 +184,fr_C_S,0.0,0.0 +185,fr_HOCCN,0.0,0.0 +186,fr_Imine,0.0,0.0 +187,EState_VSA11,0.0,0.0 +188,SlogP_VSA9,0.0,0.0 +189,fr_N_O,0.0,0.0 +190,SlogP_VSA7,0.0,0.0 +191,fr_aldehyde,0.0,0.0 +192,fr_alkyl_carbamate,0.0,0.0 +193,fr_aryl_methyl,0.0,0.0 +194,fr_isothiocyan,0.0,0.0 +195,fr_azide,0.0,0.0 +196,fr_azo,0.0,0.0 +197,fr_barbitur,0.0,0.0 +198,fr_benzodiazepine,0.0,0.0 +199,fr_diazo,0.0,0.0 +200,fr_dihydropyridine,0.0,0.0 +201,fr_epoxide,0.0,0.0 +202,SMR_VSA8,0.0,0.0 +203,NumSaturatedCarbocycles,0.0,0.0 +204,fr_guanido,0.0,0.0 +205,fr_hdrzine,0.0,0.0 +206,fr_hdrzone,0.0,0.0 +207,fr_imide,0.0,0.0 +208,fr_isocyan,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_lung.png b/reports/feature_importance/desc/desc_importance_biodist_lung.png new file mode 100644 index 0000000..f24571c Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_lung.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.csv b/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.csv new file mode 100644 index 0000000..264caf2 --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.004274957260089493,0.17249052820491348 +1,BCUT2D_CHGHI,0.0025441550751529565,0.10265427840538412 +2,BCUT2D_CHGLO,0.0020798469932270898,0.08391988144452972 +3,MaxEStateIndex,0.0016625275886545318,0.06708143367876807 +4,BCUT2D_LOGPHI,0.0015631652454748935,0.06307225603885526 +5,BCUT2D_MRHI,0.0007884916679384737,0.03181490153308185 +6,BCUT2D_LOGPLOW,0.0007855184383449213,0.03169493449905924 +7,MaxAbsEStateIndex,0.0006295833643009225,0.02540310006631954 +8,SPS,0.0005112410365806192,0.02062809779716311 +9,FractionCSP3,0.00040155119353596866,0.016202215194283064 +10,MinAbsPartialCharge,0.0002863410206763058,0.011553592445075363 +11,MaxAbsPartialCharge,0.00026874399311872506,0.010843568836984372 +12,Kappa1,0.0002333491580784406,0.009415420338560172 +13,Kappa2,0.00022015026469413617,0.008882857331946312 +14,MinPartialCharge,0.0002171091250103434,0.0087601501892809 +15,LabuteASA,0.0002096070323495739,0.008457447765146899 +16,NumHAcceptors,0.0002094262248144212,0.008450152350165075 +17,SlogP_VSA2,0.0002019567672807627,0.008148766722894978 +18,MolWt,0.00020139323708051092,0.008126028806235943 +19,Chi2n,0.00018949559405871902,0.007645970035033298 +20,MaxPartialCharge,0.00018569617616925253,0.007492667075786507 +21,VSA_EState7,0.0001852950709920548,0.00747648285693443 +22,HeavyAtomCount,0.0001751358313575405,0.007066567036938377 +23,Chi0v,0.00016912558129982704,0.006824059066910883 +24,Chi1,0.00016386499414971562,0.0066117992942431895 +25,Chi0,0.0001629008957346547,0.006572898824662378 +26,NumValenceElectrons,0.00016003780139648582,0.006457375645214051 +27,VSA_EState8,0.0001572973319431325,0.006346800265209328 +28,ExactMolWt,0.00013294162014192035,0.005364070067502093 +29,Chi4v,0.000132752138436079,0.005356424657843645 +30,Ipc,0.0001289364896293357,0.005202466796261229 +31,fr_unbrch_alkane,0.00012554697726350862,0.005065703141616794 +32,FpDensityMorgan2,0.00012445410339024675,0.005021606702707287 +33,SMR_VSA5,0.00012394980066828803,0.005001258559417542 +34,Chi3v,0.00012388455127862043,0.004998625807554995 +35,FpDensityMorgan1,0.00012237864033776152,0.004937863709172549 +36,BalabanJ,0.00012201472962364277,0.00492318025212754 +37,NOCount,0.00012081415032762674,0.00487473800011837 +38,Chi1n,0.00012028352590378669,0.004853327800769457 +39,Chi0n,0.00011783310615220035,0.0047544556550240666 +40,PEOE_VSA6,0.00011661093322372556,0.004705142120136013 +41,PEOE_VSA7,0.00011179551317003892,0.0045108444235617195 +42,HeavyAtomMolWt,0.00011157699627133475,0.004502027471020212 +43,Chi1v,0.00011101951083604183,0.004479533454975194 +44,SMR_VSA6,0.00010745485439588762,0.004335702900697705 +45,BertzCT,0.00010556817468388211,0.004259577138432069 +46,fr_ether,0.0001033690154721212,0.004170843120530941 +47,qed,9.835270047285092e-05,0.0039684395007463895 +48,NumHeteroatoms,9.700830653551555e-05,0.003914194462431876 +49,SMR_VSA10,9.562928188369874e-05,0.003858552107169008 +50,FpDensityMorgan3,9.468705242543666e-05,0.003820534029546764 +51,PEOE_VSA14,9.468658548906445e-05,0.003820515189100748 +52,MolLogP,9.392155724186983e-05,0.0037896470146555335 +53,fr_NH0,9.324447873955489e-05,0.0037623275301800208 +54,SMR_VSA1,9.309181489227815e-05,0.0037561676866886243 +55,EState_VSA3,9.189342412768532e-05,0.003707813739876024 +56,SlogP_VSA5,9.1655764659468e-05,0.003698224402553656 +57,PEOE_VSA1,8.943917103226286e-05,0.003608786922291162 +58,Kappa3,8.90284897568495e-05,0.0035922163168300826 +59,VSA_EState2,8.755742312446487e-05,0.0035328601537138846 +60,fr_C_O,8.584381582300745e-05,0.0034637177013848996 +61,Chi3n,8.397378978706011e-05,0.0033882638993764217 +62,MolMR,8.375246949195933e-05,0.003379333832411588 +63,TPSA,8.336425575538366e-05,0.0033636697711346484 +64,Chi2v,8.214614356895579e-05,0.0033145200833912467 +65,fr_amide,7.657412992030116e-05,0.0030896945427023783 +66,NumRotatableBonds,7.560946987586792e-05,0.0030507714119015383 +67,PEOE_VSA2,7.507415870429357e-05,0.003029172106663825 +68,EState_VSA4,7.383200001448069e-05,0.002979052164993189 +69,SlogP_VSA3,7.14415959905226e-05,0.002882601597740722 +70,fr_ester,6.907781974778548e-05,0.0027872254365625833 +71,SMR_VSA3,6.524772491504612e-05,0.0026326846913387837 +72,PEOE_VSA9,6.303492955800473e-05,0.002543400467725877 +73,EState_VSA9,6.247027607211206e-05,0.0025206172275415452 +74,BCUT2D_MWHI,5.8863394212620035e-05,0.00237508291707607 +75,Chi4n,5.709862351282099e-05,0.002303876069463629 +76,EState_VSA10,5.6465740454024615e-05,0.0022783398298097516 +77,EState_VSA5,5.533709770673559e-05,0.0022328001502784404 +78,MinAbsEStateIndex,5.529066648170658e-05,0.002230926693763395 +79,fr_C_O_noCOO,5.490865553590302e-05,0.002215512909294276 +80,AvgIpc,5.423432968042069e-05,0.0021883044915446347 +81,PEOE_VSA8,5.168654235639861e-05,0.0020855036552936655 +82,fr_Al_OH,4.881435880686408e-05,0.001969613734665476 +83,VSA_EState1,4.508355247293737e-05,0.0018190791875304284 +84,SlogP_VSA1,4.476272900767231e-05,0.00180613426068853 +85,NHOHCount,4.410191929340791e-05,0.001779471206598951 +86,PEOE_VSA10,4.232180599696179e-05,0.0017076452995576832 +87,EState_VSA2,4.223716201602196e-05,0.0017042299940719454 +88,BCUT2D_MRLOW,4.1591166607492344e-05,0.0016781646833668533 +89,VSA_EState5,4.13168567659759e-05,0.0016670965377512906 +90,fr_NH1,4.108568006064745e-05,0.0016577687738498705 +91,HallKierAlpha,3.913974863849873e-05,0.0015792522604824494 +92,PEOE_VSA12,3.9064657318760746e-05,0.0015762223959441223 +93,EState_VSA8,3.310350435358201e-05,0.0013356954476928453 +94,VSA_EState3,2.9067097238128717e-05,0.001172830194770987 +95,NumHDonors,2.824257718502311e-05,0.0011395615815842548 +96,fr_Al_OH_noTert,2.4799681625123438e-05,0.0010006439649742087 +97,MinEStateIndex,2.265856945839269e-05,0.00091425209106396 +98,EState_VSA1,2.2280820633807466e-05,0.0008990102792007633 +99,NumAliphaticRings,2.077566962049401e-05,0.0008382788431842162 +100,fr_NH2,2.0166168685872958e-05,0.0008136860503776811 +101,RingCount,1.9158450699691904e-05,0.0007730255718880499 +102,VSA_EState4,1.767411638224925e-05,0.0007131340700855258 +103,fr_oxazole,1.5829170183082257e-05,0.0006386922160405803 +104,VSA_EState6,1.3640112875240431e-05,0.0005503658005169412 +105,fr_piperzine,1.150409444222002e-05,0.0004641794539990793 +106,EState_VSA7,1.0783636949412495e-05,0.00043510966781811617 +107,EState_VSA6,1.0761669799989224e-05,0.0004342233138789666 +108,PEOE_VSA13,1.046459681802059e-05,0.0004222366968305179 +109,NumAromaticHeterocycles,9.727336628119728e-06,0.0003924889375329682 +110,PEOE_VSA11,9.693725018460673e-06,0.0003911327405112964 +111,NumAliphaticHeterocycles,8.086577746483967e-06,0.00032628585082787896 +112,fr_Nhpyrrole,7.824414754604084e-06,0.00031570781923740765 +113,fr_alkyl_halide,7.293651044422415e-06,0.0002942919998148483 +114,SlogP_VSA10,6.983986643401606e-06,0.0002817973307810766 +115,NumAromaticCarbocycles,6.956749390023057e-06,0.00028069833307506944 +116,NumAromaticRings,6.810578889327548e-06,0.000274800490046712 +117,SlogP_VSA6,6.797688746323831e-06,0.00027428038482926205 +118,fr_benzene,6.797410711237028e-06,0.00027426916637348243 +119,VSA_EState9,6.203514651947829e-06,0.0002503060156954646 +120,SMR_VSA4,6.105577394797581e-06,0.0002463543389443307 +121,NumSaturatedHeterocycles,5.714045322990511e-06,0.00023055638594356745 +122,SlogP_VSA12,5.664886899634552e-06,0.0002285728895260588 +123,fr_SH,5.377765047543915e-06,0.00021698779126352077 +124,PEOE_VSA5,5.110606460208825e-06,0.00020620819206749433 +125,SMR_VSA7,5.0601258979403636e-06,0.0002041713485811139 +126,fr_Ar_N,5.024790050774354e-06,0.0002027455801882609 +127,NumSaturatedRings,4.9410873264513694e-06,0.0001993682535269039 +128,fr_allylic_oxid,4.86871674044984e-06,0.00019644816805085836 +129,fr_morpholine,4.5792701930169865e-06,0.00018476927050493615 +130,fr_Ndealkylation2,4.271502728826535e-06,0.00017235114109856486 +131,SlogP_VSA11,3.7558212421582523e-06,0.000151543875292346 +132,fr_phenol,3.7183107125833215e-06,0.00015003036049769975 +133,fr_halogen,3.714878389921859e-06,0.0001498918694876556 +134,VSA_EState10,3.577223335298527e-06,0.0001443376167460603 +135,fr_Ar_NH,3.5437008648380982e-06,0.00014298501640770466 +136,SlogP_VSA4,3.538198327639121e-06,0.00014276299417679728 +137,SMR_VSA9,3.4952400562338023e-06,0.00014102966809313058 +138,PEOE_VSA4,3.4174223869382476e-06,0.00013788979789939102 +139,fr_Ar_OH,3.237205297736433e-06,0.00013061820685959612 +140,fr_aniline,2.9834844629041853e-06,0.00012038080841227459 +141,fr_methoxy,2.922225088768015e-06,0.00011790904994561118 +142,fr_phenol_noOrthoHbond,2.8849666796429165e-06,0.00011640570797537392 +143,fr_priamide,2.873873692441327e-06,0.00011595811631413414 +144,PEOE_VSA3,2.8119690776239633e-06,0.00011346032298930795 +145,fr_imidazole,2.5060754958390723e-06,0.00010111780298585319 +146,fr_para_hydroxylation,2.3805779507877144e-06,9.605409438777577e-05 +147,SlogP_VSA8,1.8501844925110401e-06,7.465321428338401e-05 +148,fr_quatN,1.441099207251688e-06,5.814700553271076e-05 +149,fr_Ndealkylation1,9.988727066839924e-07,4.030357973258114e-05 +150,fr_thiazole,8.230729198568842e-07,3.3210222713268084e-05 +151,fr_bicyclic,2.5131750177480575e-07,1.014042620565709e-05 +152,fr_nitrile,0.0,0.0 +153,fr_ketone_Topliss,0.0,0.0 +154,fr_nitro_arom,0.0,0.0 +155,fr_nitro,0.0,0.0 +156,fr_lactone,0.0,0.0 +157,fr_lactam,0.0,0.0 +158,fr_nitro_arom_nonortho,0.0,0.0 +159,fr_phos_ester,0.0,0.0 +160,fr_nitroso,0.0,0.0 +161,fr_oxime,0.0,0.0 +162,fr_phos_acid,0.0,0.0 +163,fr_term_acetylene,0.0,0.0 +164,fr_thiophene,0.0,0.0 +165,fr_piperdine,0.0,0.0 +166,fr_prisulfonamd,0.0,0.0 +167,fr_pyridine,0.0,0.0 +168,fr_sulfide,0.0,0.0 +169,fr_sulfonamd,0.0,0.0 +170,fr_thiocyan,0.0,0.0 +171,fr_sulfone,0.0,0.0 +172,fr_tetrazole,0.0,0.0 +173,fr_isothiocyan,0.0,0.0 +174,fr_ketone,0.0,0.0 +175,NumRadicalElectrons,0.0,0.0 +176,fr_isocyan,0.0,0.0 +177,fr_Al_COO,0.0,0.0 +178,fr_HOCCN,0.0,0.0 +179,fr_C_S,0.0,0.0 +180,fr_COO2,0.0,0.0 +181,fr_COO,0.0,0.0 +182,fr_Ar_COO,0.0,0.0 +183,fr_ArN,0.0,0.0 +184,NumSaturatedCarbocycles,0.0,0.0 +185,fr_imide,0.0,0.0 +186,NumAliphaticCarbocycles,0.0,0.0 +187,EState_VSA11,0.0,0.0 +188,SlogP_VSA9,0.0,0.0 +189,SlogP_VSA7,0.0,0.0 +190,SMR_VSA8,0.0,0.0 +191,SMR_VSA2,0.0,0.0 +192,fr_Imine,0.0,0.0 +193,fr_N_O,0.0,0.0 +194,fr_aldehyde,0.0,0.0 +195,fr_alkyl_carbamate,0.0,0.0 +196,fr_amidine,0.0,0.0 +197,fr_aryl_methyl,0.0,0.0 +198,fr_azide,0.0,0.0 +199,fr_azo,0.0,0.0 +200,fr_barbitur,0.0,0.0 +201,fr_benzodiazepine,0.0,0.0 +202,fr_diazo,0.0,0.0 +203,fr_dihydropyridine,0.0,0.0 +204,fr_epoxide,0.0,0.0 +205,fr_furan,0.0,0.0 +206,fr_guanido,0.0,0.0 +207,fr_hdrzine,0.0,0.0 +208,fr_hdrzone,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.png b/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.png new file mode 100644 index 0000000..1a4fe99 Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_lymph_nodes.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_muscle.csv b/reports/feature_importance/desc/desc_importance_biodist_muscle.csv new file mode 100644 index 0000000..ca8c99b --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_muscle.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.055860074222614235,0.2153460662835151 +1,BCUT2D_CHGHI,0.029253484142188185,0.11277505128264032 +2,BCUT2D_CHGLO,0.01859970401564016,0.07170368370517281 +3,BCUT2D_LOGPHI,0.017941142253156916,0.0691648635025657 +4,MaxEStateIndex,0.012763870030094283,0.04920597116611242 +5,BCUT2D_MRHI,0.007806300581526556,0.030094054579288515 +6,SPS,0.006514097450185297,0.025112484736316016 +7,BCUT2D_LOGPLOW,0.006014996399399831,0.023188401221204837 +8,MaxAbsEStateIndex,0.005167918917371017,0.0199228344254056 +9,MaxAbsPartialCharge,0.004484532766403036,0.017288313769791976 +10,Kappa1,0.003239865473687384,0.012489999248226987 +11,MinAbsPartialCharge,0.0032006402955656065,0.0123387823383795 +12,FractionCSP3,0.0026540908380406586,0.010231780560359333 +13,LabuteASA,0.0025719519342929766,0.009915127028170816 +14,NumHAcceptors,0.0024955757395824594,0.009620689304671422 +15,MaxPartialCharge,0.002381605969594081,0.009181324660356316 +16,SMR_VSA5,0.002259749318522252,0.008711555315721651 +17,NumValenceElectrons,0.0022451779451333966,0.008655381241782308 +18,VSA_EState8,0.0021889241080813985,0.008438517181161396 +19,Chi0v,0.0019583323996699075,0.00754956352302417 +20,HeavyAtomCount,0.0018381955228676651,0.0070864240769173 +21,ExactMolWt,0.001784243945593288,0.006878435453602426 +22,Kappa2,0.001624929629051028,0.00626426313379313 +23,Chi3v,0.0015821374187563475,0.006099294964975235 +24,Chi4v,0.0015715223495664705,0.006058372831856372 +25,Chi2n,0.001531598105453537,0.005904460954031076 +26,BalabanJ,0.0015023589899334556,0.0057917412951962914 +27,MolWt,0.001489260729838769,0.005741246217525821 +28,PEOE_VSA6,0.001480374513909627,0.005706988983336328 +29,FpDensityMorgan2,0.0014133255007711275,0.00544850845984067 +30,Chi1,0.0013655933314156842,0.005264496264208607 +31,fr_unbrch_alkane,0.0013654506641117459,0.005263946267755542 +32,VSA_EState7,0.0013234071257789742,0.005101864302799305 +33,qed,0.0013100329707048917,0.005050305622916574 +34,Chi0n,0.0013013309994205145,0.005016758669907978 +35,SlogP_VSA2,0.0012723504055696282,0.004905035637470221 +36,MinPartialCharge,0.0012691469355852459,0.004892685946403847 +37,Ipc,0.0012614803832111097,0.004863130634874053 +38,BertzCT,0.0011946336643354248,0.004605429975607635 +39,NumHeteroatoms,0.001122319974969979,0.004326653608765779 +40,PEOE_VSA7,0.0011005012409165092,0.004242540248461672 +41,SlogP_VSA5,0.0010805195321738946,0.0041655088009523705 +42,HeavyAtomMolWt,0.0010269986381091906,0.003959180503662984 +43,PEOE_VSA1,0.0010027496101892755,0.003865698122079631 +44,FpDensityMorgan3,0.0009842802527853405,0.0037944969373502426 +45,SlogP_VSA3,0.0009467491319553117,0.003649810784558021 +46,EState_VSA4,0.0009439852850597608,0.003639155883628336 +47,Chi1v,0.0009351881548167355,0.0036052420834985772 +48,EState_VSA3,0.000930911358213762,0.003588754613018773 +49,Chi0,0.0009223339049547157,0.003555687689213674 +50,VSA_EState2,0.0009050123718958835,0.003488911479942021 +51,Chi2v,0.0009043983322488103,0.003486544296862164 +52,fr_C_O,0.0009026882644967739,0.0034799518179117824 +53,MolLogP,0.0008767760665302829,0.0033800577526335 +54,fr_ether,0.0008258794837958463,0.0031838464329804867 +55,Chi1n,0.000815593566248835,0.00314419321173631 +56,Kappa3,0.0007922287369645855,0.0030541195026377982 +57,NOCount,0.0007874399549114661,0.0030356582780193234 +58,fr_NH0,0.000782849231910108,0.0030179605904759816 +59,PEOE_VSA2,0.0007828032957231842,0.0030177835019688185 +60,SMR_VSA6,0.000774034342557547,0.0029839783272367043 +61,TPSA,0.0007675832952302228,0.0029591089069095533 +62,NumRotatableBonds,0.0007644270054798883,0.002946941100274052 +63,EState_VSA5,0.0007267341204413339,0.002801631330588079 +64,FpDensityMorgan1,0.0006369229333553548,0.0024554003934406893 +65,fr_ester,0.0006294323940520732,0.002426523629566811 +66,BCUT2D_MRLOW,0.0006224741776249677,0.0023996990226041537 +67,EState_VSA10,0.0006152989064350307,0.0023720376482366124 +68,PEOE_VSA14,0.0006150990461506052,0.002371267167232849 +69,fr_amide,0.0006103696358211747,0.002353034826433861 +70,fr_Al_OH,0.0005886718767697468,0.0022693878366308842 +71,AvgIpc,0.0005885222061773434,0.002268810841813728 +72,Chi4n,0.0005730912670918658,0.002209323057786172 +73,fr_NH1,0.0005725054795856735,0.0022070647894810885 +74,SMR_VSA3,0.0005656184332506247,0.0021805145502054547 +75,Chi3n,0.0005565994461723447,0.002145745470210513 +76,PEOE_VSA9,0.0005458088543146801,0.00210414667998767 +77,MolMR,0.000524223099094058,0.002020931475940591 +78,fr_C_O_noCOO,0.0005093714440280294,0.0019636768886386975 +79,SMR_VSA1,0.0005071539552208132,0.0019551282517400303 +80,PEOE_VSA8,0.0004701330810033698,0.0018124091496970944 +81,MinAbsEStateIndex,0.0004667950054323386,0.0017995405408887007 +82,SMR_VSA10,0.0004606388575080705,0.0017758079866914513 +83,PEOE_VSA12,0.0004533068369803656,0.0017475423282489134 +84,BCUT2D_MWHI,0.0004456196702146953,0.0017179075462173796 +85,EState_VSA9,0.00044031233016206327,0.0016974472296376255 +86,EState_VSA8,0.0003962585247075787,0.00152761548770983 +87,EState_VSA1,0.00037777045972703035,0.0014563421834372892 +88,VSA_EState1,0.0003742505930878075,0.001442772752226395 +89,EState_VSA2,0.000372508103239753,0.0014360552829150923 +90,PEOE_VSA10,0.00034599693261254643,0.0013338521192675088 +91,RingCount,0.0003394170587237052,0.0013084860599071734 +92,NHOHCount,0.000317415246387096,0.0012236669148606603 +93,HallKierAlpha,0.0003157612808449417,0.0012172907154332844 +94,SlogP_VSA1,0.00030804799246028034,0.0011875552319978774 +95,VSA_EState5,0.0002927946777494791,0.0011287522073605465 +96,MinEStateIndex,0.00029144295471142536,0.0011235411824379938 +97,VSA_EState3,0.00029047072548130776,0.0011197931433752938 +98,fr_oxazole,0.0002775119168875857,0.001069835664921939 +99,NumHDonors,0.0002360801930046528,0.0009101123046919172 +100,fr_Al_OH_noTert,0.00018026190117855694,0.0006949273136460446 +101,VSA_EState6,0.00015302591461578834,0.0005899299134587663 +102,NumAromaticRings,0.0001444451378069964,0.0005568501770433414 +103,EState_VSA6,0.00014000464146100397,0.0005397316279944274 +104,NumAromaticCarbocycles,0.00013343421672382832,0.0005144019960407723 +105,NumSaturatedHeterocycles,0.00012545042073160284,0.0004836236792400434 +106,EState_VSA7,0.00011966267527383137,0.0004613113526932807 +107,NumAliphaticHeterocycles,0.0001160685076420154,0.0004474554838666374 +108,SlogP_VSA10,0.00010760933083386683,0.00041484452738326655 +109,NumAromaticHeterocycles,0.00010461403749990701,0.00040329737772745115 +110,fr_Ndealkylation2,9.994708572255534e-05,0.0003853058207742261 +111,fr_piperzine,9.992521137571904e-05,0.0003852214930211883 +112,NumSaturatedRings,9.873880619323109e-05,0.0003806477846503583 +113,PEOE_VSA11,8.796036730343396e-05,0.00033909584531091136 +114,fr_Ar_N,8.213588673631465e-05,0.00031664190131371306 +115,PEOE_VSA13,7.697640434336687e-05,0.0002967515905176247 +116,fr_NH2,7.674228387254847e-05,0.00029584903313421943 +117,fr_alkyl_halide,7.168645717169121e-05,0.0002763583252002868 +118,PEOE_VSA3,6.850631866346282e-05,0.00026409857926342113 +119,SlogP_VSA11,6.706141327509098e-05,0.00025852832723873324 +120,VSA_EState4,6.601410891901791e-05,0.0002544908662002261 +121,VSA_EState9,6.575099358610552e-05,0.0002534765307789061 +122,VSA_EState10,6.46341075420982e-05,0.0002491708255070974 +123,fr_benzene,6.267958262023342e-05,0.00024163594018454483 +124,fr_SH,6.250137200428052e-05,0.00024094892077668512 +125,SlogP_VSA6,6.0207735427564416e-05,0.00023210672675611505 +126,fr_phenol_noOrthoHbond,5.7349026461776844e-05,0.00022108612323921983 +127,fr_morpholine,5.105493833056112e-05,0.00019682179601155663 +128,SMR_VSA7,5.013316042616368e-05,0.0001932682517590377 +129,fr_Nhpyrrole,4.899519110314895e-05,0.00018888126837827935 +130,SlogP_VSA12,4.7431941934003195e-05,0.00018285478946858562 +131,PEOE_VSA5,4.305599920458889e-05,0.00016598510094461611 +132,SlogP_VSA8,4.227333045359369e-05,0.0001629678361257732 +133,SMR_VSA9,4.155154447416404e-05,0.00016018528036422278 +134,NumAliphaticRings,4.112712609791596e-05,0.0001585491058862007 +135,fr_Ar_NH,3.9839404811773155e-05,0.00015358481399615988 +136,fr_Ndealkylation1,3.8910498053343866e-05,0.00015000378731197162 +137,fr_imidazole,3.7875569686905254e-05,0.00014601403692765232 +138,fr_aniline,3.436188323095741e-05,0.0001324684309295903 +139,SMR_VSA4,3.390126541196288e-05,0.00013069270404843252 +140,PEOE_VSA4,2.8754891087291428e-05,0.00011085292614151708 +141,fr_halogen,2.689001073776963e-05,0.00010366362943993186 +142,fr_priamide,2.6055459168281102e-05,0.00010044635126583069 +143,fr_phenol,2.6053092095844205e-05,0.0001004372259693647 +144,fr_methoxy,2.5389901115310774e-05,9.788055967702446e-05 +145,fr_allylic_oxid,1.925739704965389e-05,7.423915487430329e-05 +146,fr_Ar_OH,1.860304453415091e-05,7.17165617317535e-05 +147,fr_para_hydroxylation,1.771253633040146e-05,6.828356524294809e-05 +148,SlogP_VSA4,1.4481738896282615e-05,5.58285241768783e-05 +149,fr_thiazole,1.4016775087191289e-05,5.403604307753381e-05 +150,fr_quatN,7.077695134648536e-06,2.728520910170044e-05 +151,fr_bicyclic,6.4838376042641516e-06,2.4995830061646324e-05 +152,fr_nitrile,0.0,0.0 +153,fr_nitro,0.0,0.0 +154,SMR_VSA8,0.0,0.0 +155,fr_nitro_arom,0.0,0.0 +156,fr_nitro_arom_nonortho,0.0,0.0 +157,fr_lactone,0.0,0.0 +158,fr_nitroso,0.0,0.0 +159,NumRadicalElectrons,0.0,0.0 +160,fr_oxime,0.0,0.0 +161,fr_phos_acid,0.0,0.0 +162,SMR_VSA2,0.0,0.0 +163,fr_phos_ester,0.0,0.0 +164,fr_piperdine,0.0,0.0 +165,fr_ketone_Topliss,0.0,0.0 +166,fr_thiophene,0.0,0.0 +167,fr_prisulfonamd,0.0,0.0 +168,fr_pyridine,0.0,0.0 +169,fr_sulfide,0.0,0.0 +170,fr_sulfonamd,0.0,0.0 +171,fr_sulfone,0.0,0.0 +172,fr_term_acetylene,0.0,0.0 +173,fr_thiocyan,0.0,0.0 +174,fr_tetrazole,0.0,0.0 +175,fr_lactam,0.0,0.0 +176,fr_COO2,0.0,0.0 +177,fr_ketone,0.0,0.0 +178,fr_azide,0.0,0.0 +179,fr_C_S,0.0,0.0 +180,fr_HOCCN,0.0,0.0 +181,fr_Imine,0.0,0.0 +182,fr_Ar_COO,0.0,0.0 +183,fr_N_O,0.0,0.0 +184,fr_ArN,0.0,0.0 +185,fr_Al_COO,0.0,0.0 +186,NumSaturatedCarbocycles,0.0,0.0 +187,fr_aldehyde,0.0,0.0 +188,fr_alkyl_carbamate,0.0,0.0 +189,NumAliphaticCarbocycles,0.0,0.0 +190,EState_VSA11,0.0,0.0 +191,fr_amidine,0.0,0.0 +192,fr_aryl_methyl,0.0,0.0 +193,fr_azo,0.0,0.0 +194,fr_isothiocyan,0.0,0.0 +195,fr_barbitur,0.0,0.0 +196,SlogP_VSA9,0.0,0.0 +197,fr_benzodiazepine,0.0,0.0 +198,fr_diazo,0.0,0.0 +199,fr_dihydropyridine,0.0,0.0 +200,fr_epoxide,0.0,0.0 +201,SlogP_VSA7,0.0,0.0 +202,fr_furan,0.0,0.0 +203,fr_guanido,0.0,0.0 +204,fr_hdrzine,0.0,0.0 +205,fr_hdrzone,0.0,0.0 +206,fr_COO,0.0,0.0 +207,fr_imide,0.0,0.0 +208,fr_isocyan,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_muscle.png b/reports/feature_importance/desc/desc_importance_biodist_muscle.png new file mode 100644 index 0000000..5806928 Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_muscle.png differ diff --git a/reports/feature_importance/desc/desc_importance_biodist_spleen.csv b/reports/feature_importance/desc/desc_importance_biodist_spleen.csv new file mode 100644 index 0000000..3e27019 --- /dev/null +++ b/reports/feature_importance/desc/desc_importance_biodist_spleen.csv @@ -0,0 +1,211 @@ +rank,feature,ig_raw,ig_normalized +0,BCUT2D_MWLOW,0.028850912336811328,0.19788461328829826 +1,BCUT2D_CHGHI,0.01376712727847449,0.09442691537056618 +2,BCUT2D_CHGLO,0.009739153311617938,0.06679957168515549 +3,BCUT2D_LOGPHI,0.007028112829660736,0.04820490157149922 +4,MaxEStateIndex,0.0058721372430278005,0.04027621705500624 +5,BCUT2D_MRHI,0.004428830429688942,0.030376765443920994 +6,SPS,0.00418024994667516,0.02867178460387185 +7,BCUT2D_LOGPLOW,0.003457416035375256,0.02371396187232705 +8,FractionCSP3,0.003148724527382077,0.021596687417658515 +9,MaxAbsEStateIndex,0.0028880808064174522,0.019808966415043996 +10,MinAbsPartialCharge,0.0024498407064425954,0.016803135206014914 +11,MaxAbsPartialCharge,0.002331376342598946,0.015990603714671592 +12,MaxPartialCharge,0.001483164847547726,0.01017283262565337 +13,NumHAcceptors,0.001434919106723418,0.009841921434548874 +14,MolWt,0.0014181275552225358,0.009726750391204674 +15,Kappa1,0.0011822582381256022,0.008108953766425205 +16,NOCount,0.0011788280279684648,0.008085426406093634 +17,NumValenceElectrons,0.0011549742305745508,0.007921816346985404 +18,HeavyAtomMolWt,0.0011139125358740232,0.007640179583408653 +19,Chi0n,0.0009931028635253728,0.00681156192948108 +20,VSA_EState8,0.0009599171154896353,0.006583945248245113 +21,Chi1,0.0009521653853760119,0.006530777150892002 +22,SMR_VSA5,0.0009432462339769063,0.006469601864479314 +23,SlogP_VSA2,0.00094243254414917,0.00646402087296617 +24,Chi0,0.0009415915818470236,0.006458252823137882 +25,LabuteASA,0.000931187636887416,0.0063868935329715925 +26,ExactMolWt,0.0009194905658025197,0.006306664860782009 +27,HeavyAtomCount,0.0009103131703312937,0.006243718312242531 +28,fr_unbrch_alkane,0.0009079306528590691,0.006227376938245765 +29,PEOE_VSA6,0.0008995217205183217,0.0061697011772510885 +30,Chi2v,0.0008850536755740776,0.006070466759794677 +31,Chi4v,0.000875437646855532,0.006004511683500238 +32,MolLogP,0.0008285925112087094,0.005683207059102565 +33,NumHeteroatoms,0.0007976508212405557,0.005470982077016912 +34,Chi0v,0.0007963098674020957,0.005461784650999208 +35,Chi1v,0.000776625539431515,0.005326772434303085 +36,Kappa2,0.0007749646456608484,0.005315380582368408 +37,VSA_EState2,0.0007688684705302479,0.005273567718907677 +38,SMR_VSA6,0.0007605723123245005,0.005216665460873076 +39,FpDensityMorgan3,0.0007573316220561144,0.005194437992533071 +40,Kappa3,0.0007549901653658462,0.005178378249039489 +41,fr_C_O,0.0007545086369243214,0.005175075508788989 +42,BalabanJ,0.0007372916042702892,0.005056986146173993 +43,PEOE_VSA2,0.0007264310789046499,0.004982495231051521 +44,FpDensityMorgan2,0.0007211997589133948,0.0049466142952470735 +45,Ipc,0.0007107440402295525,0.004874899895915464 +46,fr_NH0,0.000689399713774252,0.004728501967933226 +47,SlogP_VSA5,0.0006810130651361206,0.004670979047924074 +48,MinPartialCharge,0.0006517478050583335,0.004470252477974833 +49,qed,0.0006504782878691504,0.004461545026539275 +50,TPSA,0.0006498049209416426,0.004456926491343735 +51,PEOE_VSA14,0.0006258925282536846,0.0042929145348200234 +52,FpDensityMorgan1,0.0006133405333787238,0.004206822052792717 +53,Chi2n,0.0006044528905240338,0.004145862879342217 +54,PEOE_VSA1,0.0005987297162099192,0.00410660837942524 +55,Chi3v,0.0005950242963133098,0.0040811933916858134 +56,Chi1n,0.0005842317484973459,0.004007168725636257 +57,fr_amide,0.0005722647847355444,0.003925088895071672 +58,Chi4n,0.0005567516088964972,0.003818686062261897 +59,NumRotatableBonds,0.0005540875295072944,0.0038004134921071886 +60,EState_VSA3,0.0005180732441216634,0.0035533962451934067 +61,BertzCT,0.0005118623741150041,0.003510796704662291 +62,SMR_VSA1,0.0005031860483004235,0.0034512869269977932 +63,EState_VSA4,0.00048788661290362606,0.0033463501117705773 +64,MolMR,0.00047844846389403644,0.0032816150890053876 +65,fr_ether,0.0004783296838039536,0.0032808003919892934 +66,VSA_EState5,0.00046768440746429413,0.0032077858416267794 +67,SlogP_VSA3,0.0004565692315510651,0.0031315483118896583 +68,SMR_VSA10,0.00045547519295684326,0.00312404444505825 +69,fr_ester,0.00042752815459743765,0.002932359384504791 +70,BCUT2D_MWHI,0.00042601522961838563,0.002921982431051159 +71,VSA_EState7,0.0004216165421667246,0.0028918124123291705 +72,fr_C_O_noCOO,0.0004054669834515071,0.00278104471306789 +73,AvgIpc,0.00040451192555533054,0.0027744940965658362 +74,PEOE_VSA7,0.00039050539236374705,0.0026784251275235715 +75,PEOE_VSA9,0.00038802904724814097,0.0026614402020609254 +76,EState_VSA5,0.00037210838321190064,0.0025522424612988556 +77,SMR_VSA3,0.0003528765197406209,0.0024203336390959678 +78,EState_VSA10,0.00034668347786393324,0.0023778563793635976 +79,HallKierAlpha,0.0003457341835106535,0.00237134529424426 +80,Chi3n,0.00034561987288746156,0.002370561252713753 +81,BCUT2D_MRLOW,0.00033806599078262025,0.0023187501688321983 +82,fr_NH1,0.00033470996424036993,0.002295731624158617 +83,EState_VSA2,0.00032655715467714674,0.002239812575610086 +84,EState_VSA9,0.00032598909204415095,0.0022359163087211364 +85,PEOE_VSA8,0.00032169269991563684,0.0022064479201668813 +86,VSA_EState1,0.000292340568079747,0.0020051255082540608 +87,PEOE_VSA12,0.0002853833851708794,0.001957407105680682 +88,MinAbsEStateIndex,0.00027230333229543623,0.0018676927432774847 +89,EState_VSA8,0.0002708356659825337,0.0018576262130626181 +90,VSA_EState6,0.00025311517621448787,0.001736083704316548 +91,fr_Al_OH,0.0002428273789407516,0.001665521055852076 +92,NumHDonors,0.00023344885621518816,0.00160119500192706 +93,fr_NH2,0.00023110290687397979,0.001585104443931546 +94,MinEStateIndex,0.00021107152616258443,0.001447711838130046 +95,SlogP_VSA1,0.00020605719460679584,0.0014133191974664613 +96,VSA_EState3,0.00020086086345560332,0.0013776782455143222 +97,NumAliphaticRings,0.00019699084184433358,0.0013511342762622181 +98,EState_VSA6,0.00018509051217412609,0.0012695114801682579 +99,PEOE_VSA11,0.0001842669471462738,0.00126386275595629 +100,EState_VSA1,0.00018266764204982017,0.001252893332637987 +101,NHOHCount,0.00017430889311353617,0.0011955617730144779 +102,PEOE_VSA10,0.00017191223774450806,0.0011791234290429308 +103,RingCount,0.00016483232231048444,0.0011305632202211096 +104,fr_phenol,0.00013459373777783057,0.0009231607458461496 +105,fr_phenol_noOrthoHbond,0.00012948361377200352,0.0008881110773662343 +106,fr_Al_OH_noTert,0.00011691840230830372,0.0008019279444949936 +107,NumSaturatedHeterocycles,0.00011621426327041871,0.0007970983474414791 +108,NumAromaticCarbocycles,0.0001143494518369946,0.0007843078510769017 +109,SMR_VSA4,0.00010889668784811084,0.0007469080600167109 +110,SlogP_VSA11,0.00010724244142116396,0.0007355618013383584 +111,NumAromaticRings,9.83015892295103e-05,0.0006742376720436395 +112,fr_oxazole,9.107779016548108e-05,0.000624690584326975 +113,fr_benzene,8.669558189239712e-05,0.0005946335941235296 +114,VSA_EState4,8.632879437812956e-05,0.000592117846802527 +115,NumAliphaticHeterocycles,7.749977681511318e-05,0.0005315607765173041 +116,fr_Ar_N,7.529914244548992e-05,0.0005164669148002792 +117,NumAromaticHeterocycles,7.498979634411494e-05,0.000514345150575696 +118,EState_VSA7,7.410446225016036e-05,0.0005082727604631096 +119,VSA_EState10,7.309448171504126e-05,0.000501345436803894 +120,PEOE_VSA3,6.862309437899313e-05,0.0004706767791362775 +121,fr_thiazole,6.082155984481315e-05,0.0004171670798127748 +122,SMR_VSA7,5.972918630790018e-05,0.0004096746333904623 +123,SlogP_VSA8,5.7276153820953125e-05,0.0003928496061817456 +124,SlogP_VSA10,5.68323253096259e-05,0.0003898054447942303 +125,fr_Ndealkylation2,5.3256952175274214e-05,0.0003652824306935695 +126,PEOE_VSA13,5.1642682992206477e-05,0.00035421037067323525 +127,fr_Nhpyrrole,5.081261998635194e-05,0.00034851707768475426 +128,SlogP_VSA4,5.073281991533974e-05,0.0003479697394928705 +129,fr_Ar_OH,4.890003419453097e-05,0.00033539890327914673 +130,SlogP_VSA6,4.706037217792902e-05,0.00032278090345693784 +131,VSA_EState9,4.6167396548972675e-05,0.000316656101060778 +132,fr_piperzine,4.48255579833296e-05,0.0003074526068156822 +133,fr_SH,4.230460385665801e-05,0.0002901617140130499 +134,NumSaturatedRings,4.116740016739433e-05,0.0002823617834717601 +135,SlogP_VSA12,4.017560298465871e-05,0.00027555917703509474 +136,fr_morpholine,3.9407859811712555e-05,0.00027029332758431884 +137,fr_imidazole,3.684675756311129e-05,0.00025272706409359323 +138,fr_allylic_oxid,3.643650368836643e-05,0.0002499131867227092 +139,PEOE_VSA4,3.551536475093669e-05,0.00024359521589772755 +140,SMR_VSA9,3.3991363231079e-05,0.00023314228990747965 +141,PEOE_VSA5,3.0327565580477844e-05,0.00020801278367932707 +142,fr_alkyl_halide,2.7853537121390296e-05,0.00019104374785905835 +143,fr_Ar_NH,2.3231754885665335e-05,0.00015934355135427628 +144,fr_methoxy,1.8713023398167435e-05,0.00012835016637849958 +145,fr_Ndealkylation1,1.7955681698099167e-05,0.0001231556592622066 +146,fr_para_hydroxylation,1.652164422045547e-05,0.00011331978480556495 +147,fr_priamide,1.0969522257443902e-05,7.52385104682474e-05 +148,fr_aniline,8.837162541480941e-06,6.061293562129571e-05 +149,fr_bicyclic,8.769235192463492e-06,6.0147030868090844e-05 +150,fr_halogen,7.983085855608213e-06,5.47549359598141e-05 +151,fr_quatN,4.38489684534322e-06,3.0075430766974587e-05 +152,fr_ketone_Topliss,0.0,0.0 +153,fr_nitro_arom,0.0,0.0 +154,fr_nitro,0.0,0.0 +155,fr_nitrile,0.0,0.0 +156,fr_lactam,0.0,0.0 +157,fr_lactone,0.0,0.0 +158,fr_nitro_arom_nonortho,0.0,0.0 +159,fr_sulfone,0.0,0.0 +160,fr_nitroso,0.0,0.0 +161,fr_oxime,0.0,0.0 +162,fr_thiophene,0.0,0.0 +163,fr_phos_acid,0.0,0.0 +164,fr_term_acetylene,0.0,0.0 +165,fr_phos_ester,0.0,0.0 +166,fr_piperdine,0.0,0.0 +167,fr_thiocyan,0.0,0.0 +168,fr_prisulfonamd,0.0,0.0 +169,fr_pyridine,0.0,0.0 +170,fr_sulfide,0.0,0.0 +171,fr_tetrazole,0.0,0.0 +172,fr_sulfonamd,0.0,0.0 +173,fr_isothiocyan,0.0,0.0 +174,fr_ketone,0.0,0.0 +175,NumRadicalElectrons,0.0,0.0 +176,fr_isocyan,0.0,0.0 +177,fr_Al_COO,0.0,0.0 +178,fr_HOCCN,0.0,0.0 +179,fr_C_S,0.0,0.0 +180,fr_COO2,0.0,0.0 +181,fr_COO,0.0,0.0 +182,fr_Ar_COO,0.0,0.0 +183,fr_ArN,0.0,0.0 +184,NumSaturatedCarbocycles,0.0,0.0 +185,fr_imide,0.0,0.0 +186,NumAliphaticCarbocycles,0.0,0.0 +187,EState_VSA11,0.0,0.0 +188,SlogP_VSA9,0.0,0.0 +189,SlogP_VSA7,0.0,0.0 +190,SMR_VSA8,0.0,0.0 +191,SMR_VSA2,0.0,0.0 +192,fr_Imine,0.0,0.0 +193,fr_N_O,0.0,0.0 +194,fr_aldehyde,0.0,0.0 +195,fr_alkyl_carbamate,0.0,0.0 +196,fr_amidine,0.0,0.0 +197,fr_aryl_methyl,0.0,0.0 +198,fr_azide,0.0,0.0 +199,fr_azo,0.0,0.0 +200,fr_barbitur,0.0,0.0 +201,fr_benzodiazepine,0.0,0.0 +202,fr_diazo,0.0,0.0 +203,fr_dihydropyridine,0.0,0.0 +204,fr_epoxide,0.0,0.0 +205,fr_furan,0.0,0.0 +206,fr_guanido,0.0,0.0 +207,fr_hdrzine,0.0,0.0 +208,fr_hdrzone,0.0,0.0 +209,fr_urea,0.0,0.0 diff --git a/reports/feature_importance/desc/desc_importance_biodist_spleen.png b/reports/feature_importance/desc/desc_importance_biodist_spleen.png new file mode 100644 index 0000000..6c2bb21 Binary files /dev/null and b/reports/feature_importance/desc/desc_importance_biodist_spleen.png differ diff --git a/reports/feature_importance/token_importance_biodist_heart.csv b/reports/feature_importance/token_importance_biodist_heart.csv new file mode 100644 index 0000000..cd158c3 --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_heart.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +desc,0.003565494833867307,0.1951674721649977,0.5030443072319031 +mpnn,0.0035352216048565735,0.19351038111435986,0.5024935007095337 +maccs,0.0033419436527607417,0.18293076423272317,0.5030479431152344 +morgan,0.003287899124425829,0.17997248369357177,0.5045571327209473 +help,0.0015989603878636958,0.08752363179077756,0.49689680337905884 +exp,0.0014671291042487212,0.08030747258308228,0.5002157688140869 +comp,0.00145684211449858,0.07974438502321485,0.5007365345954895 +phys,1.540816108059627e-05,0.0008434093972727343,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_heart.png b/reports/feature_importance/token_importance_biodist_heart.png new file mode 100644 index 0000000..4d2ba98 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_heart.png differ diff --git a/reports/feature_importance/token_importance_biodist_kidney.csv b/reports/feature_importance/token_importance_biodist_kidney.csv new file mode 100644 index 0000000..42a3705 --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_kidney.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +mpnn,0.0048559099571079505,0.19172898631538365,0.5024935007095337 +desc,0.004839143601445488,0.19106698961368168,0.5030443072319031 +maccs,0.004575290677921078,0.18064911654549684,0.5030479431152344 +morgan,0.00455865913776129,0.1799924428501718,0.5045571327209473 +help,0.0022256484525745635,0.0878766957121806,0.49689680337905884 +exp,0.002128977738724099,0.08405978433271738,0.5002157688140869 +comp,0.002122205807078399,0.08379240384146124,0.5007365345954895 +phys,2.1112056818827408e-05,0.0008335807889068271,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_kidney.png b/reports/feature_importance/token_importance_biodist_kidney.png new file mode 100644 index 0000000..d91e089 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_kidney.png differ diff --git a/reports/feature_importance/token_importance_biodist_liver.csv b/reports/feature_importance/token_importance_biodist_liver.csv new file mode 100644 index 0000000..b6d80dd --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_liver.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +morgan,0.09780316888744485,0.18587602810007073,0.5045571327209473 +desc,0.09773031070292665,0.18573756029675845,0.5030443072319031 +mpnn,0.09623063655897192,0.1828874126328982,0.5024935007095337 +maccs,0.09577168710943411,0.18201517401579392,0.5030479431152344 +exp,0.048989938672256456,0.09310593226017691,0.5002157688140869 +help,0.04618960921225445,0.08778387446473049,0.49689680337905884 +comp,0.04303189396486456,0.08178260094023622,0.5007365345954895 +phys,0.000426946836545812,0.00081141728933502,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_liver.png b/reports/feature_importance/token_importance_biodist_liver.png new file mode 100644 index 0000000..0a4effe Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_liver.png differ diff --git a/reports/feature_importance/token_importance_biodist_lung.csv b/reports/feature_importance/token_importance_biodist_lung.csv new file mode 100644 index 0000000..f68b38c --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_lung.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +mpnn,0.02534164865578116,0.20432202544341246,0.5024935007095337 +desc,0.02522037199336683,0.20334420850497661,0.5030443072319031 +morgan,0.024225846008065377,0.19532564718589915,0.5045571327209473 +maccs,0.022470533439988064,0.18117309444288665,0.5030479431152344 +exp,0.010714971488376543,0.08639156460619887,0.5002157688140869 +comp,0.008086441757158726,0.06519852677682575,0.5007365345954895 +help,0.007875960603478175,0.06350148108647499,0.49689680337905884 +phys,9.220884607395201e-05,0.0007434519533255273,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_lung.png b/reports/feature_importance/token_importance_biodist_lung.png new file mode 100644 index 0000000..9daeae4 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_lung.png differ diff --git a/reports/feature_importance/token_importance_biodist_lymph_nodes.csv b/reports/feature_importance/token_importance_biodist_lymph_nodes.csv new file mode 100644 index 0000000..b870560 --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_lymph_nodes.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +mpnn,0.008867993321820084,0.19070281729454297,0.5024935007095337 +desc,0.008645351299381949,0.18591498543830598,0.5030443072319031 +maccs,0.008442808000788875,0.181559368979869,0.5030479431152344 +morgan,0.008195466011645657,0.17624037375140086,0.5045571327209473 +help,0.0042255772506620244,0.09086942864674906,0.49689680337905884 +comp,0.004044422364805251,0.08697376185429058,0.5007365345954895 +exp,0.00404026382099498,0.08688433395423678,0.5002157688140869 +phys,3.97556488833418e-05,0.0008549300806047392,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_lymph_nodes.png b/reports/feature_importance/token_importance_biodist_lymph_nodes.png new file mode 100644 index 0000000..5ef9725 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_lymph_nodes.png differ diff --git a/reports/feature_importance/token_importance_biodist_muscle.csv b/reports/feature_importance/token_importance_biodist_muscle.csv new file mode 100644 index 0000000..0e935d6 --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_muscle.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +desc,0.13433959473357746,0.18119593133256653,0.5030443072319031 +morgan,0.1341354503183032,0.18092058334208527,0.5045571327209473 +maccs,0.13308209800950063,0.17949983205134845,0.5030479431152344 +mpnn,0.13146415221946048,0.17731756259569512,0.5024935007095337 +help,0.07109125874805401,0.09588719441952036,0.49689680337905884 +exp,0.07106845419792793,0.09585643586537324,0.5002157688140869 +comp,0.06553188976274463,0.08838877191115811,0.5007365345954895 +phys,0.0006922414393679412,0.0009336884822529513,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_muscle.png b/reports/feature_importance/token_importance_biodist_muscle.png new file mode 100644 index 0000000..87b1566 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_muscle.png differ diff --git a/reports/feature_importance/token_importance_biodist_spleen.csv b/reports/feature_importance/token_importance_biodist_spleen.csv new file mode 100644 index 0000000..e3c9021 --- /dev/null +++ b/reports/feature_importance/token_importance_biodist_spleen.csv @@ -0,0 +1,9 @@ +token,Integrated Gradients_raw,Integrated Gradients_normalized,gate_sigmoid +morgan,0.0604498204589427,0.19035905271412099,0.5045571327209473 +mpnn,0.05944027304270918,0.1871799450118123,0.5024935007095337 +desc,0.05922306966733361,0.18649596235530655,0.5030443072319031 +maccs,0.05836604891749294,0.18379716760525727,0.5030479431152344 +exp,0.0294058000902328,0.09260011373035326,0.5002157688140869 +help,0.025210135140292342,0.07938778655861305,0.49689680337905884 +comp,0.025193235727010176,0.07933456958824997,0.5007365345954895 +phys,0.0002684633315854155,0.0008454024362865825,0.49989768862724304 diff --git a/reports/feature_importance/token_importance_biodist_spleen.png b/reports/feature_importance/token_importance_biodist_spleen.png new file mode 100644 index 0000000..c2d9d46 Binary files /dev/null and b/reports/feature_importance/token_importance_biodist_spleen.png differ