From 3ed608953b48af49d98702231d2e0a637e69f528 Mon Sep 17 00:00:00 2001 From: FNTwin Date: Tue, 5 Dec 2023 21:59:37 +0000 Subject: [PATCH 1/8] Ad hoc en for tmqm --- src/openqdc/utils/atomization_energies.py | 583 ++++++++++++++++++++-- 1 file changed, 544 insertions(+), 39 deletions(-) diff --git a/src/openqdc/utils/atomization_energies.py b/src/openqdc/utils/atomization_energies.py index 6a1a638..6e154ea 100644 --- a/src/openqdc/utils/atomization_energies.py +++ b/src/openqdc/utils/atomization_energies.py @@ -451,48 +451,553 @@ def get_matrix(level_of_theory: str) -> np.ndarray: } -# tpssh/def2-tzvp +# tpssh/def2-tzvp #TPSSH-D3BJ2B/def2-SVP TMQM = { - ("H", -1): -0.5066148831768739, - ("H", 0): -0.4998936035891093, + ("H", -6): 14.217092939231382, + ("H", -5): 9.53974805964775, + ("H", -4): 5.433414842896548, + ("H", -3): 2.1119440739229214, + ("H", -1): -0.4881933665181651, + ("H", 0): -0.49932136885281153, ("H", 1): 0.0, - ("Li", 1): -7.285942861425713, - ("B", -3): -24.011884397333016, - ("B", -1): -24.671478908940745, - ("B", 0): -24.66555991803692, - ("B", 3): -22.03729209090186, - ("C", -1): -37.902383828698945, - ("C", 0): -37.8619600939805, - ("C", 1): -37.44108173595555, - ("N", -1): -54.58878376740317, - ("N", 0): -54.61011499135528, - ("N", 1): -54.07150720832228, - ("O", -1): -75.12797596615384, - ("O", 0): -75.0993524949928, - ("O", 1): -74.58770047919643, - ("F", -1): -99.86387164958151, - ("F", 0): -99.76596802854195, - ("Na", 1): -162.0916076478938, - ("Mg", 2): -199.24528576913457, - ("Si", 4): -285.59703939232946, - ("Si", 0): -289.3842044105128, - ("Si", -4): -288.1798768489279, - ("P", 0): -341.2798907965112, - ("P", 1): -340.89320025019333, - ("S", -1): -398.19525449701325, - ("S", 0): -398.130358877624, - ("S", 1): -397.7467993687058, - ("Cl", -2): -459.4908872312368, - ("Cl", -1): -460.28412127843484, - ("Cl", 0): -460.1641720279233, - ("Cl", 2): -458.485405333257, - ("K", 1): -599.7644436257333, - ("Ca", 2): -676.9154959968483, - ("Br", -1): -2574.1448096288846, - ("Br", 0): -2574.0232838745055, - ("I", -1): -297.70580680306847, - ("I", 0): -297.5887657326151, + ("B", -6): -20.421183965485852, + ("B", -5): -21.920626428593025, + ("B", -4): -22.962664498156805, + ("B", -3): -23.73349564958635, + ("B", -2): -24.306000788732604, + ("B", -1): -24.58548934982911, + ("B", 0): -24.637348411950693, + ("B", 1): -24.312496471473946, + ("B", 2): -23.4108738232491, + ("B", 3): -22.026461412537948, + ("B", 4): -12.44455196326006, + ("C", -6): -31.895373279742977, + ("C", -5): -33.9506463543837, + ("C", -4): -35.656553844452446, + ("C", -3): -36.69214503581623, + ("C", -2): -37.362069416465516, + ("C", -1): -37.81335057610834, + ("C", 0): -37.74581636768891, + ("C", 1): -37.39449062780137, + ("C", 2): -36.47046673082783, + ("C", 3): -34.74740633163261, + ("C", 4): -32.3971471660242, + ("C", 5): -17.940892699105923, + ("N", -6): -46.268339962295215, + ("N", -5): -49.117131020755544, + ("N", -4): -51.32537250307082, + ("N", -3): -53.1394229178753, + ("N", -2): -54.00563745554605, + ("N", -1): -54.39724479097468, + ("N", 0): -54.433552070223456, + ("N", 1): -53.904318947855266, + ("N", 2): -52.902918072844685, + ("N", 3): -51.1230205834063, + ("N", 4): -48.32975315811246, + ("N", 5): -44.766857762131444, + ("N", 6): -24.43628263625023, + ("O", -6): -64.16089634959765, + ("O", -5): -67.63170932536735, + ("O", -4): -70.44733822607806, + ("O", -3): -72.69043185216711, + ("O", -2): -74.49494376979466, + ("O", -1): -74.98766067447303, + ("O", 0): -74.89255194766493, + ("O", 1): -74.44165308621425, + ("O", 2): -73.06387655761876, + ("O", 3): -71.15693295238583, + ("O", 4): -68.26976439456558, + ("O", 5): -64.15871487985697, + ("O", 6): -59.135565014771096, + ("F", -6): -85.61158654605235, + ("F", -5): -89.58875709918351, + ("F", -4): -93.0053772425132, + ("F", -3): -95.76140511583853, + ("F", -2): -97.96750937278892, + ("F", -1): -99.68116358159514, + ("F", 0): -99.63798794044517, + ("F", 1): -98.86140542715097, + ("F", 2): -97.51156856463808, + ("F", 3): -95.2225215237081, + ("F", 4): -92.15309260335772, + ("F", 5): -87.90912397062483, + ("F", 6): -82.23308985743904, + ("Si", -6): -285.1799937308777, + ("Si", -5): -286.6172847058642, + ("Si", -4): -287.79026534707845, + ("Si", -3): -288.53191795266656, + ("Si", -2): -288.9994524077356, + ("Si", -1): -289.30338149908283, + ("Si", 0): -289.2383527972283, + ("Si", 1): -288.9846941105963, + ("Si", 2): -288.3781150254342, + ("Si", 3): -287.1591354481736, + ("Si", 4): -285.50925741472344, + ("Si", 5): -279.30789983898103, + ("Si", 6): -271.30390829058473, + ("P", -6): -335.53595307220115, + ("P", -5): -337.6945574727502, + ("P", -4): -339.13281661433933, + ("P", -3): -340.2874563462647, + ("P", -2): -340.863292209629, + ("P", -1): -341.1026931040225, + ("P", 0): -341.09451235922893, + ("P", 1): -340.7155374221294, + ("P", 2): -340.05019255182657, + ("P", 3): -338.9351163349605, + ("P", 4): -337.06106610753034, + ("P", 5): -334.68481631744487, + ("P", 6): -326.51173123115, + ("S", -6): -390.5784020593457, + ("S", -5): -393.1858499940331, + ("S", -4): -395.31729025950784, + ("S", -3): -396.68365026043057, + ("S", -2): -397.7422883113922, + ("S", -1): -398.03276257329304, + ("S", 0): -397.9253873123578, + ("S", 1): -397.522795253734, + ("S", 2): -396.6712118235141, + ("S", 3): -395.470717301252, + ("S", 4): -393.72753783666093, + ("S", 5): -391.08206624234487, + ("S", 6): -387.86503025737534, + ("Cl", -6): -450.3551787713087, + ("Cl", -5): -453.3211487176432, + ("Cl", -4): -455.871646932377, + ("Cl", -3): -457.9512132074125, + ("Cl", -2): -459.1913966706105, + ("Cl", -1): -460.09570657841857, + ("Cl", 0): -460.005267036287, + ("Cl", 1): -459.4470878162211, + ("Cl", 2): -458.53955262427075, + ("Cl", 3): -457.0971473617074, + ("Cl", 4): -455.2405360625723, + ("Cl", 5): -452.75059639963706, + ("Cl", 6): -449.21921809416574, + ("Sc", -6): -757.4518370997483, + ("Sc", -5): -758.4460566976777, + ("Sc", -4): -759.2970821606076, + ("Sc", -3): -759.9002569435896, + ("Sc", -2): -760.2733552389665, + ("Sc", -1): -760.4886803225745, + ("Sc", 0): -760.5214470970552, + ("Sc", 1): -760.2593555469751, + ("Sc", 2): -759.8233486786779, + ("Sc", 3): -758.8712941423976, + ("Sc", 4): -756.0915570065674, + ("Sc", 5): -752.4812502361659, + ("Sc", 6): -748.3549984464066, + ("Ti", -6): -845.877971817033, + ("Ti", -5): -847.0083728833423, + ("Ti", -4): -847.9024743012754, + ("Ti", -3): -848.5448896250384, + ("Ti", -2): -848.9183407833955, + ("Ti", -1): -849.2054897066263, + ("Ti", 0): -849.1688717721736, + ("Ti", 1): -848.9878595089176, + ("Ti", 2): -848.4336809964936, + ("Ti", 3): -847.4626495665269, + ("Ti", 4): -845.7813898855254, + ("Ti", 5): -842.011543007215, + ("Ti", 6): -837.3426859924365, + ("V", -6): -940.178527506359, + ("V", -5): -941.36680834942, + ("V", -4): -942.3129907630023, + ("V", -3): -942.9562885518893, + ("V", -2): -943.4308412125442, + ("V", -1): -943.6771718004992, + ("V", 1): -943.4482869898394, + ("V", 2): -942.9322435731367, + ("V", 3): -941.7985542135455, + ("V", 4): -940.0944320542314, + ("V", 5): -937.5430391724902, + ("V", 6): -932.6683263906164, + ("Cr", -6): -1040.4731136899904, + ("Cr", -5): -1041.8114857614405, + ("Cr", -4): -1042.7003382868302, + ("Cr", -3): -1043.4563382654667, + ("Cr", -2): -1043.8556446891687, + ("Cr", -1): -1044.1888456182858, + ("Cr", 0): -1044.1167922331992, + ("Cr", 1): -1043.8985937353898, + ("Cr", 2): -1043.3058940297103, + ("Cr", 3): -1042.1512730959923, + ("Cr", 4): -1040.216730875938, + ("Cr", 5): -1037.59851076094, + ("Cr", 6): -1034.0567831780008, + ("Mn", -6): -1146.80381465019, + ("Mn", -5): -1148.0358596249296, + ("Mn", -4): -1149.1586690857089, + ("Mn", -3): -1149.889299104998, + ("Mn", -2): -1150.3667364933947, + ("Mn", -1): -1150.6272312694484, + ("Mn", 0): -1150.690451314217, + ("Mn", 1): -1150.3857653106045, + ("Mn", 2): -1149.8043933632885, + ("Mn", 3): -1148.5393992632821, + ("Mn", 4): -1146.64997710289, + ("Mn", 5): -1143.8471220282531, + ("Mn", 6): -1140.2382524792695, + ("Fe", -5): -1260.6298780394447, + ("Fe", -4): -1261.7336773632696, + ("Fe", -3): -1262.5594263852918, + ("Fe", -2): -1263.0208246846253, + ("Fe", -1): -1263.4116328242426, + ("Fe", 0): -1263.3512507819016, + ("Fe", 1): -1263.1276557824633, + ("Fe", 2): -1262.48411351784, + ("Fe", 3): -1261.2787531459567, + ("Fe", 4): -1259.2201344354494, + ("Fe", 5): -1256.3735984818406, + ("Fe", 6): -1252.4858633213635, + ("Co", -6): -1378.248634792311, + ("Co", -5): -1379.5848614724607, + ("Co", -4): -1380.7782517279618, + ("Co", -3): -1381.5051720930724, + ("Co", -2): -1382.1625509120943, + ("Co", -1): -1382.4008244615416, + ("Co", 0): -1382.4858194313604, + ("Co", 1): -1382.1475227454841, + ("Co", 2): -1381.5412117950295, + ("Co", 3): -1380.2065561071115, + ("Co", 4): -1378.1546327751068, + ("Co", 5): -1375.1717646951852, + ("Co", 6): -1371.2468515424805, + ("Ni", -6): -1503.6034570040808, + ("Ni", -5): -1505.0285591577751, + ("Ni", -3): -1507.0892521643116, + ("Ni", -2): -1507.633918535823, + ("Ni", -1): -1508.0320423257854, + ("Ni", 0): -1508.0248389973694, + ("Ni", 1): -1507.768992654911, + ("Ni", 2): -1507.0126735182846, + ("Ni", 3): -1505.7596295630663, + ("Ni", 4): -1503.461888519249, + ("Ni", 5): -1500.489295526536, + ("Ni", 6): -1496.4139265280396, + ("Cu", -6): -1635.9908985279567, + ("Cu", -4): -1638.6070971095817, + ("Cu", -3): -1639.4873290605722, + ("Cu", -2): -1639.9686896965115, + ("Cu", -1): -1640.249152902949, + ("Cu", 0): -1640.2298564634566, + ("Cu", 1): -1639.9654540534657, + ("Cu", 2): -1639.2211147162361, + ("Cu", 3): -1637.6960709822747, + ("Cu", 4): -1635.5670497621793, + ("Cu", 5): -1632.2854107447433, + ("Cu", 6): -1628.1130639768749, + ("Zn", -5): -1775.921885477983, + ("Zn", -4): -1777.0969011233067, + ("Zn", -3): -1778.0519867732373, + ("Zn", -2): -1778.6570897496686, + ("Zn", -1): -1778.966734039045, + ("Zn", 0): -1779.114058351904, + ("Zn", 1): -1778.7925895618753, + ("Zn", 2): -1778.1482787095524, + ("Zn", 3): -1776.6771770437067, + ("Zn", 4): -1774.3971558407582, + ("Zn", 5): -1771.1886345592475, + ("Zn", 6): -1766.804411239567, + ("As", -6): -2230.577881365602, + ("As", -5): -2232.443018432056, + ("As", -4): -2233.7000568978974, + ("As", -3): -2234.7108209970634, + ("As", -2): -2235.225763197557, + ("As", -1): -2235.435630139579, + ("As", 0): -2235.4591433174783, + ("As", 1): -2235.060564986174, + ("As", 2): -2234.4363747418793, + ("As", 3): -2233.3999180939004, + ("As", 4): -2231.5987559382424, + ("As", 5): -2229.3218563189944, + ("As", 6): -2224.838308358153, + ("Se", -6): -2395.1077016185045, + ("Se", -5): -2397.193309933493, + ("Se", -4): -2398.94011555639, + ("Se", -3): -2400.0712613985743, + ("Se", -2): -2400.9404693865185, + ("Se", -1): -2401.1799305754835, + ("Se", 0): -2401.068339968518, + ("Se", 1): -2400.742219528764, + ("Se", 2): -2399.916304261895, + ("Se", 3): -2398.825518203835, + ("Se", 4): -2397.2561555649268, + ("Se", 5): -2394.8105066058793, + ("Se", 6): -2391.845011862833, + ("Br", -6): -2566.305971704081, + ("Br", -4): -2570.5591180385823, + ("Br", -3): -2572.164670935644, + ("Br", -2): -2573.130827588049, + ("Br", -1): -2573.8171880991854, + ("Br", 0): -2573.7141695947707, + ("Br", 2): -2572.451811210953, + ("Br", 3): -2571.1158715982597, + ("Br", 4): -2569.476929606877, + ("Br", 5): -2567.2985159627506, + ("Br", 6): -2564.137537390847, + ("Y", -6): -36.271770209787064, + ("Y", -5): -36.92073617126403, + ("Y", -4): -37.418930941437246, + ("Y", -3): -37.81201291199269, + ("Y", -2): -38.06775251670872, + ("Y", -1): -38.195186791777395, + ("Y", 0): -38.19535824808211, + ("Y", 1): -37.95592765775424, + ("Y", 2): -37.52384619556868, + ("Y", 3): -36.768358768379294, + ("Y", 4): -34.52443835899878, + ("Y", 6): -28.345155043719103, + ("Zr", -6): -44.84396719954361, + ("Zr", -5): -45.543429638466996, + ("Zr", -4): -46.068033147456624, + ("Zr", -3): -46.468782934249155, + ("Zr", -2): -46.72405867777039, + ("Zr", -1): -46.86045996850077, + ("Zr", 0): -46.81440666319012, + ("Zr", 1): -46.620247674457886, + ("Zr", 2): -46.09609781617706, + ("Zr", 3): -45.29776597316922, + ("Zr", 4): -44.033950648813885, + ("Zr", 5): -41.042993929434964, + ("Nb", -6): -54.563157891740275, + ("Nb", -5): -55.28927328584156, + ("Nb", -4): -55.892112805536684, + ("Nb", -3): -56.32465304202445, + ("Nb", -2): -56.60682692201728, + ("Nb", -1): -56.73606107169563, + ("Nb", 0): -56.75919106003537, + ("Nb", 1): -56.461091196502004, + ("Nb", 2): -55.98789715521299, + ("Nb", 3): -55.019998545045866, + ("Nb", 4): -53.69589155125546, + ("Nb", 5): -51.8368959549036, + ("Nb", 6): -48.02309636855343, + ("Mo", -6): -65.68262766787593, + ("Mo", -5): -66.43863632604713, + ("Mo", -4): -67.05100303704401, + ("Mo", -3): -67.52201445732598, + ("Mo", -2): -67.82341196143874, + ("Mo", -1): -67.97576198642685, + ("Mo", 0): -67.90193130970466, + ("Mo", 1): -67.67151053636739, + ("Mo", 2): -67.10876168647336, + ("Mo", 3): -66.14016337048571, + ("Mo", 4): -64.63760500541133, + ("Mo", 5): -62.69965735285641, + ("Mo", 6): -60.17149898917262, + ("Tc", -6): -78.24291321902125, + ("Tc", -4): -79.67167486240695, + ("Tc", -3): -80.02177614260515, + ("Tc", -2): -80.40585912736145, + ("Tc", -1): -80.5709190323354, + ("Tc", 0): -80.55971325845906, + ("Tc", 1): -80.24795459984225, + ("Tc", 2): -79.68221068482787, + ("Tc", 3): -78.62824258437635, + ("Tc", 4): -77.08605637898086, + ("Tc", 5): -74.94060384427543, + ("Tc", 6): -72.31832290307409, + ("Ru", -5): -93.06413042789033, + ("Ru", -4): -93.7296526698646, + ("Ru", -3): -94.20913184929866, + ("Ru", -2): -94.53030624589103, + ("Ru", -1): -94.70272952813828, + ("Ru", 0): -94.64322954332718, + ("Ru", 1): -94.40421228439834, + ("Ru", 2): -93.74018556934499, + ("Ru", 3): -92.64024584330552, + ("Ru", 4): -90.97693302017467, + ("Ru", 5): -88.76745246513465, + ("Ru", 6): -85.89706374409035, + ("Rh", -6): -107.97687581473977, + ("Rh", -5): -108.76981824821871, + ("Rh", -4): -109.41173186328163, + ("Rh", -3): -109.90131686977266, + ("Rh", -2): -110.24136134144607, + ("Rh", -1): -110.42455721953728, + ("Rh", 0): -110.40378753655074, + ("Rh", 1): -110.06191210860608, + ("Rh", 2): -109.43554661179566, + ("Rh", 3): -108.18753350877438, + ("Rh", 4): -106.47131214491833, + ("Rh", 5): -104.06015767706262, + ("Rh", 6): -101.07942885640458, + ("Pd", -6): -125.40077609670448, + ("Pd", -5): -126.15663189962221, + ("Pd", -4): -126.78639397592143, + ("Pd", -3): -127.25492045545673, + ("Pd", -2): -127.60483676570557, + ("Pd", -1): -127.78570613446139, + ("Pd", 0): -127.78086369804826, + ("Pd", 1): -127.4768668081393, + ("Pd", 2): -126.67568470504608, + ("Pd", 3): -125.45674595439053, + ("Pd", 4): -123.59508338117145, + ("Pd", 5): -121.13049820069097, + ("Pd", 6): -117.94515276650662, + ("Ag", -6): -144.39334316873845, + ("Ag", -4): -145.85306358383858, + ("Ag", -3): -146.3274656435732, + ("Ag", -2): -146.699918219871, + ("Ag", -1): -146.90993265165818, + ("Ag", 0): -146.87649139325026, + ("Ag", 1): -146.59627974618712, + ("Ag", 2): -145.80605013291836, + ("Ag", 3): -144.49038756005856, + ("Ag", 4): -142.57677311237106, + ("Ag", 5): -139.87624438648987, + ("Ag", 6): -136.66083770943845, + ("Cd", -6): -164.906717746825, + ("Cd", -5): -165.7832543695694, + ("Cd", -4): -166.49618163659363, + ("Cd", -3): -167.03098007552236, + ("Cd", -2): -167.40186209415344, + ("Cd", -1): -167.63134551777608, + ("Cd", 0): -167.67117047917543, + ("Cd", 1): -167.3486889896838, + ("Cd", 2): -166.72343179087278, + ("Cd", 3): -165.3468632359542, + ("Cd", 4): -163.3697556829444, + ("Cd", 5): -160.73054957062402, + ("Cd", 6): -157.23510711771647, + ("I", -6): -291.76167938142703, + ("I", -5): -293.54143742779524, + ("I", -4): -295.0388926322482, + ("I", -3): -296.2638218278227, + ("I", -2): -297.0982975298163, + ("I", -1): -297.68752386389065, + ("I", 0): -297.5797705297976, + ("I", 2): -296.4707741750163, + ("I", 4): -293.91227357755355, + ("I", 5): -292.05762346352446, + ("I", 6): -289.31776543199595, + ("La", -6): -29.743525858784448, + ("La", -5): -30.317188282790234, + ("La", -4): -30.78047206839345, + ("La", -3): -31.13084827019753, + ("La", -2): -31.34504296675372, + ("La", -1): -31.459180300111893, + ("La", 0): -31.473219525909958, + ("La", 1): -31.2466832516279, + ("La", 2): -30.862849855496297, + ("La", 3): -30.168840964756807, + ("La", 4): -28.29746458322268, + ("La", 6): -23.18956374594004, + ("Hf", -6): -45.702324092873674, + ("Hf", -5): -46.41976209817525, + ("Hf", -4): -46.96178056212097, + ("Hf", -3): -47.39287887991919, + ("Hf", -2): -47.675647388133854, + ("Hf", -1): -47.815350726895645, + ("Hf", 0): -47.7927448155551, + ("Hf", 1): -47.59546068616233, + ("Hf", 2): -47.032606117317286, + ("Hf", 3): -46.247688116590716, + ("Hf", 4): -45.0407838798737, + ("Hf", 5): -42.032388261322964, + ("Ta", -6): -54.37502502110377, + ("Ta", -5): -55.1386579395384, + ("Ta", -4): -55.81443895120174, + ("Ta", -3): -56.30206989453955, + ("Ta", -2): -56.601046826201454, + ("Ta", -1): -56.76172060020315, + ("Ta", 0): -56.792078095978056, + ("Ta", 1): -56.50595767906413, + ("Ta", 2): -55.96853101344914, + ("Ta", 3): -55.051628632379874, + ("Ta", 4): -53.79788079283529, + ("Ta", 5): -52.05390271791209, + ("Ta", 6): -48.29539248877423, + ("W", -6): -64.39855316588778, + ("W", -5): -65.24441945353799, + ("W", -4): -65.8010874700245, + ("W", -3): -66.38146269974948, + ("W", -2): -66.64871137061746, + ("W", -1): -66.85086830899256, + ("W", 0): -66.80859012118346, + ("W", 2): -65.99059484566425, + ("W", 3): -65.02817223968405, + ("W", 4): -63.617834084774046, + ("W", 5): -61.81628780267842, + ("W", 6): -59.47952795003247, + ("Re", -6): -75.45333740093768, + ("Re", -5): -76.26410454636374, + ("Re", -4): -77.00677237958723, + ("Re", -3): -77.49516079133471, + ("Re", -2): -77.86355391446041, + ("Re", -1): -78.04858121948739, + ("Re", 0): -78.0234349915115, + ("Re", 1): -77.68787177760888, + ("Re", 2): -77.11526977113783, + ("Re", 3): -76.08858057383813, + ("Re", 4): -74.66669252442837, + ("Re", 5): -72.69913557050401, + ("Re", 6): -70.28964931505236, + ("Os", -6): -87.52930714769049, + ("Os", -5): -88.52916555552326, + ("Os", -4): -89.31150823294084, + ("Os", -3): -89.88363978619724, + ("Os", -2): -90.260416698843, + ("Os", -1): -90.49437189014766, + ("Os", 0): -90.42576450549853, + ("Os", 1): -90.15503898967641, + ("Os", 2): -89.48728233357622, + ("Os", 3): -88.52064428420316, + ("Os", 4): -86.91440925803343, + ("Os", 5): -84.88976015489344, + ("Os", 6): -82.26924956935397, + ("Ir", -6): -101.30302826888877, + ("Ir", -5): -102.22232589052359, + ("Ir", -4): -103.00806168769438, + ("Ir", -3): -103.5917952098325, + ("Ir", -2): -104.00155750199649, + ("Ir", -1): -104.18698533291567, + ("Ir", 0): -104.18212920288903, + ("Ir", 1): -103.80076855263967, + ("Ir", 2): -103.15917295496453, + ("Ir", 3): -102.0377258049875, + ("Ir", 4): -100.45791646207557, + ("Ir", 5): -98.31335577772536, + ("Ir", 6): -95.64239631799757, + ("Pt", -6): -116.44150060626156, + ("Pt", -5): -117.35152029981751, + ("Pt", -3): -118.64569231731156, + ("Pt", -2): -119.07493348048287, + ("Pt", -1): -119.28395194384188, + ("Pt", 0): -119.19898314494726, + ("Pt", 1): -118.88817763171755, + ("Pt", 2): -118.17699294981111, + ("Pt", 3): -117.01416494545127, + ("Pt", 4): -115.2249098649729, + ("Pt", 5): -113.02720123979448, + ("Pt", 6): -110.20069996745082, + ("Au", -6): -132.7582845534032, + ("Au", -4): -134.46966688446687, + ("Au", -3): -135.0308104887005, + ("Au", -2): -135.47194383022386, + ("Au", -1): -135.7254446488866, + ("Au", 0): -135.65814857835585, + ("Au", 1): -135.3182379698876, + ("Au", 2): -134.55872819229756, + ("Au", 3): -133.33593424822286, + ("Au", 4): -131.59790403972784, + ("Au", 5): -129.17553810230254, + ("Au", 6): -126.31586641391426, + ("Hg", -6): -150.16795785032326, + ("Hg", -5): -151.2023599700243, + ("Hg", -3): -152.70285712633589, + ("Hg", -2): -153.12030207317588, + ("Hg", -1): -153.40069599454648, + ("Hg", 0): -153.4646356814841, + ("Hg", 1): -153.09033736226763, + ("Hg", 2): -152.39892757532584, + ("Hg", 3): -151.11053801877802, + ("Hg", 4): -149.2965421401237, + ("Hg", 5): -146.9091447381117, + ("Hg", 6): -143.83528053924022, } + # "wb97m-d3bj/def2-TZVPPD" SPICE = { ("H", -1): -0.5027370838426788, From 87db7342918c50a26741ac4f05fcdc825b4dee6c Mon Sep 17 00:00:00 2001 From: FNTwin Date: Thu, 7 Dec 2023 11:49:01 -0700 Subject: [PATCH 2/8] Added atoms_per_molecules, unit transition1x, fixed dummy class --- src/openqdc/datasets/base.py | 10 ++++++++++ src/openqdc/datasets/dummy.py | 9 +++++++-- src/openqdc/datasets/transition1x.py | 4 ++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/base.py index d7c8234..92c62e8 100644 --- a/src/openqdc/datasets/base.py +++ b/src/openqdc/datasets/base.py @@ -253,6 +253,16 @@ def data_shapes(self): "forces": (-1, 3, len(self.force_target_names)), } + @property + def atoms_per_molecules(self): + try: + if hasattr(self, "_n_atoms"): + return self._n_atoms + self._n_atoms = self.data["n_atoms"] + return self._n_atoms + except: # noqa + return None + def _set_units(self, en, ds): old_en, old_ds = self.energy_unit, self.distance_unit if en is not None: diff --git a/src/openqdc/datasets/dummy.py b/src/openqdc/datasets/dummy.py index 99b5106..6072703 100644 --- a/src/openqdc/datasets/dummy.py +++ b/src/openqdc/datasets/dummy.py @@ -23,7 +23,7 @@ class Dummy(BaseDataset): force_target_names = [f"forces{i}" for i in range(len(__force_methods__))] __isolated_atom_energies__ = [] - __average_n_atoms__ = 20 + __average_n_atoms__ = None @property def _stats(self): @@ -51,6 +51,11 @@ def __init__(self, energy_unit=None, distance_unit=None, cache_dir=None) -> None except: # noqa pass self._set_isolated_atom_energies() + self.setup_dummy() + + def setup_dummy(self): + self._n_atoms = np.array([np.random.randint(1, 100) for _ in range(self.__len__())]) + self.__average_nb_atoms__ = self._n_atoms.mean() def is_preprocessed(self): return True @@ -63,7 +68,7 @@ def __len__(self): def __getitem__(self, idx: int): shift = IsolatedAtomEnergyFactory.max_charge - size = np.random.randint(1, 100) + size = self._n_atoms[idx] z = np.random.randint(1, 100, size) c = np.random.randint(-1, 2, size) return Bunch( diff --git a/src/openqdc/datasets/transition1x.py b/src/openqdc/datasets/transition1x.py index 56ae7e6..0285ec9 100644 --- a/src/openqdc/datasets/transition1x.py +++ b/src/openqdc/datasets/transition1x.py @@ -55,6 +55,10 @@ class Transition1X(BaseDataset): "wB97x_6-31G(d).forces", ] + __energy_unit__ = "hartree" + __distance_unit__ = "ang" + __forces_unit__ = "hartree/ang" + def read_raw_entries(self): raw_path = p_join(self.root, "Transition1x.h5") f = load_hdf5_file(raw_path)["data"] From 61bb40c174081bd24c0eccad1eb4cce12d3240d9 Mon Sep 17 00:00:00 2001 From: Nikhil Shenoy Date: Mon, 15 Jan 2024 18:21:52 +0000 Subject: [PATCH 3/8] Updated dummy dataset --- src/openqdc/datasets/dummy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/openqdc/datasets/dummy.py b/src/openqdc/datasets/dummy.py index 6072703..c87e03d 100644 --- a/src/openqdc/datasets/dummy.py +++ b/src/openqdc/datasets/dummy.py @@ -30,15 +30,15 @@ def _stats(self): return { "formation": { "energy": { - "mean": array([-12.94348027, -9.83037297]), - "std": array([4.39971409, 3.3574188]), + "mean": array([[-12.94348027, -9.83037297]]), + "std": array([[4.39971409, 3.3574188]]), }, "forces": NOT_DEFINED, }, "total": { "energy": { - "mean": array([-89.44242, -1740.5336]), - "std": array([29.599571, 791.48663]), + "mean": array([[-89.44242, -1740.5336]]), + "std": array([[29.599571, 791.48663]]), }, "forces": NOT_DEFINED, }, From 14d3e85fea21cb1b11e27988619cbf67d081652e Mon Sep 17 00:00:00 2001 From: prtos Date: Sat, 20 Jan 2024 22:22:16 -0500 Subject: [PATCH 4/8] Update gcp path --- src/openqdc/utils/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openqdc/utils/io.py b/src/openqdc/utils/io.py index 6105d93..12e2f95 100644 --- a/src/openqdc/utils/io.py +++ b/src/openqdc/utils/io.py @@ -43,7 +43,7 @@ def get_local_cache() -> str: def get_remote_cache() -> str: - remote_cache = "gs://opendatasets/openqdc" + remote_cache = "gs://qmdata-public/openqdc" return remote_cache From feb3671ded89c3e55260280cd7dc22026589a57f Mon Sep 17 00:00:00 2001 From: Nikhil Shenoy Date: Mon, 22 Jan 2024 16:39:39 +0530 Subject: [PATCH 5/8] removed torch-dependency --- README.md | 13 ++++++------- env.yml | 19 ------------------- src/openqdc/datasets/base.py | 3 +-- src/openqdc/utils/__init__.py | 2 -- src/openqdc/utils/io.py | 16 ---------------- 5 files changed, 7 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index b3f769a..c143e95 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,15 @@ Open Quantum Data Commons -## Setup Datasets - -Use the scripts in `setup/` to download the datasets. For more information, see the [README](setup/README.md) in the `setup/` directory. - -# Install the library in dev mode +### Installing openQDC +```bash +git clone git@github.com:OpenDrugDiscovery/openQDC.git +cd openQDC +# use mamba/conda +mamba env create -n openqdc -f env.yml pip install -e . ``` -## Development lifecycle - ### Tests You can run tests locally with: diff --git a/env.yml b/env.yml index eefbd3a..a88b919 100644 --- a/env.yml +++ b/env.yml @@ -8,39 +8,22 @@ dependencies: - tqdm - loguru - fsspec - - s3fs - gcsfs - - joblib - - prettytable - - pyrootutils # Scientific - pandas - numpy - - scipy - - sympy # Chem - - ipdb - datamol #==0.9.0 - rdkit #-pypi #==2022.9.3 - ase - - # ML - #- einops =0.6.0 - - pytorch - dscribe # other stuffs - h5py >=3.8.0 - gdown #==4.6.4 - # Viz - - matplotlib - - seaborn - - ipywidgets - - nglview - # Dev - pytest >=6.0 - pytest-cov @@ -49,8 +32,6 @@ dependencies: - jupyterlab - pre-commit - ruff - - ipykernel - - isort # Doc - mkdocs diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/base.py index d7c8234..29b4646 100644 --- a/src/openqdc/datasets/base.py +++ b/src/openqdc/datasets/base.py @@ -6,7 +6,6 @@ import numpy as np import pandas as pd -import torch from ase.io.extxyz import write_extxyz from loguru import logger from sklearn.utils import Bunch @@ -82,7 +81,7 @@ def read_qc_archive_h5( return samples -class BaseDataset(torch.utils.data.Dataset): +class BaseDataset: __energy_methods__ = [] __force_methods__ = [] energy_target_names = [] diff --git a/src/openqdc/utils/__init__.py b/src/openqdc/utils/__init__.py index aeb5321..bc05f90 100644 --- a/src/openqdc/utils/__init__.py +++ b/src/openqdc/utils/__init__.py @@ -6,7 +6,6 @@ load_hdf5_file, load_json, load_pkl, - load_torch, makedirs, save_pkl, set_cache_dir, @@ -19,7 +18,6 @@ "makedirs", "load_hdf5_file", "load_json", - "load_torch", "create_hdf5_file", "check_file", "set_cache_dir", diff --git a/src/openqdc/utils/io.py b/src/openqdc/utils/io.py index 6105d93..9c98059 100644 --- a/src/openqdc/utils/io.py +++ b/src/openqdc/utils/io.py @@ -5,7 +5,6 @@ import fsspec import h5py -import torch from ase.atoms import Atoms from fsspec.implementations.local import LocalFileSystem from gcsfs import GCSFileSystem @@ -70,21 +69,6 @@ def copy_exists(local_path): return os.path.exists(local_path) or gcp_filesys.exists(remote_path) -def load_torch_gcs(path): - """Loads torch file""" - # get file system - fs: GCSFileSystem = fsspec.filesystem("gs") - - # load from GCS - with fs.open(path, "rb") as fp: - return torch.load(fp) - - -def load_torch(path): - """Loads torch file""" - return torch.load(path) - - def makedirs_gcs(path, exist_ok=True): """Creates directory""" fs: GCSFileSystem = fsspec.filesystem("gs") From 2b88e107eb08a8812dd086a04f0984bff31f5726 Mon Sep 17 00:00:00 2001 From: FNTwin Date: Wed, 24 Jan 2024 09:18:15 -0700 Subject: [PATCH 6/8] New normalization energy + Fixes + Merge --- src/openqdc/datasets/base.py | 29 +++++++++++++++++------ src/openqdc/utils/atomization_energies.py | 1 + src/openqdc/utils/constants.py | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/base.py index 92c62e8..a9c47ec 100644 --- a/src/openqdc/datasets/base.py +++ b/src/openqdc/datasets/base.py @@ -143,10 +143,18 @@ def _precompute_statistics(self, overwrite_local_cache: bool = False): logger.info("Loaded precomputed statistics") else: logger.info("Precomputing relevant statistics") - (formation_E_mean, formation_E_std, total_E_mean, total_E_std) = self._precompute_E() + ( + inter_E_mean, + inter_E_std, + formation_E_mean, + formation_E_std, + total_E_mean, + total_E_std, + ) = self._precompute_E() forces_dict = self._precompute_F() stats = { "formation": {"energy": {"mean": formation_E_mean, "std": formation_E_std}, "forces": forces_dict}, + "inter": {"energy": {"mean": inter_E_mean, "std": inter_E_std}, "forces": forces_dict}, "total": {"energy": {"mean": total_E_mean, "std": total_E_std}, "forces": forces_dict}, } with open(local_path, "wb") as f: @@ -162,7 +170,7 @@ def _precompute_E(self): s = np.array(self.data["atomic_inputs"][:, :2], dtype=int) s[:, 1] += IsolatedAtomEnergyFactory.max_charge matrixs = [matrix[s[:, 0], s[:, 1]] for matrix in self.__isolated_atom_energies__] - converted_energy_data = self.convert_energy(self.data["energies"]) + converted_energy_data = self.data["energies"] # calculation per molecule formation energy statistics E = [] for i, matrix in enumerate(matrixs): @@ -170,12 +178,16 @@ def _precompute_E(self): c[1:] = c[1:] - c[:-1] E.append(converted_energy_data[:, i] - c) E = np.array(E).T + inter_E_mean = np.nanmean(E / self.data["n_atoms"][:, None], axis=0) + inter_E_std = np.nanstd(E / self.data["n_atoms"][:, None], axis=0) formation_E_mean = np.nanmean(E, axis=0) formation_E_std = np.nanstd(E, axis=0) total_E_mean = np.nanmean(converted_energy_data, axis=0) total_E_std = np.nanstd(converted_energy_data, axis=0) return ( + np.atleast_2d(inter_E_mean), + np.atleast_2d(inter_E_std), np.atleast_2d(formation_E_mean), np.atleast_2d(formation_E_std), np.atleast_2d(total_E_mean), @@ -265,10 +277,13 @@ def atoms_per_molecules(self): def _set_units(self, en, ds): old_en, old_ds = self.energy_unit, self.distance_unit - if en is not None: - self.set_energy_unit(en) - if ds is not None: - self.set_distance_unit(ds) + en = en if en is not None else old_en + ds = ds if ds is not None else old_ds + + # if en is None: + self.set_energy_unit(en) + # if ds is not None: + self.set_distance_unit(ds) if self.__force_methods__: self.__forces_unit__ = self.energy_unit + "/" + self.distance_unit self.__class__.__fn_forces__ = get_conversion(old_en + "/" + old_ds, self.__forces_unit__) @@ -566,7 +581,7 @@ def get_statistics(self, normalization: str = "formation", return_none: bool = T """ Get the statistics of the dataset. normalization : str, optional - Type of energy, by default "formation", must be one of ["formation", "total"] + Type of energy, by default "formation", must be one of ["formation", "total", "inter"] return_none : bool, optional Whether to return None if the statistics for the forces are not available, by default True Otherwise, the statistics for the forces are set to 0.0 diff --git a/src/openqdc/utils/atomization_energies.py b/src/openqdc/utils/atomization_energies.py index 6e154ea..746ff66 100644 --- a/src/openqdc/utils/atomization_energies.py +++ b/src/openqdc/utils/atomization_energies.py @@ -606,6 +606,7 @@ def get_matrix(level_of_theory: str) -> np.ndarray: ("V", -3): -942.9562885518893, ("V", -2): -943.4308412125442, ("V", -1): -943.6771718004992, + ("V", 0): -943.5386343398394, ("V", 1): -943.4482869898394, ("V", 2): -942.9322435731367, ("V", 3): -941.7985542135455, diff --git a/src/openqdc/utils/constants.py b/src/openqdc/utils/constants.py index 9244637..7bf570b 100644 --- a/src/openqdc/utils/constants.py +++ b/src/openqdc/utils/constants.py @@ -6,7 +6,7 @@ BOHR2ANG = 0.52917721092 -POSSIBLE_NORMALIZATION = ["formation", "total"] +POSSIBLE_NORMALIZATION = ["formation", "total", "inter"] NOT_DEFINED = { "mean": None, From b5705ce625f46122e9ba95917e33c29eba3f038a Mon Sep 17 00:00:00 2001 From: fntwin Date: Tue, 30 Jan 2024 20:36:37 -0500 Subject: [PATCH 7/8] Updated filesystem and remote_cache for public download --- src/openqdc/utils/io.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/openqdc/utils/io.py b/src/openqdc/utils/io.py index d157ee6..b83ed61 100644 --- a/src/openqdc/utils/io.py +++ b/src/openqdc/utils/io.py @@ -10,7 +10,7 @@ from gcsfs import GCSFileSystem from rdkit.Chem import MolFromXYZFile -gcp_filesys = fsspec.filesystem("gs") +gcp_filesys = fsspec.filesystem("https") local_filesys = LocalFileSystem() _OPENQDC_CACHE_DIR = "~/.cache/openqdc" @@ -42,7 +42,8 @@ def get_local_cache() -> str: def get_remote_cache() -> str: - remote_cache = "gs://qmdata-public/openqdc" + #remote_cache = "gs://qmdata-public/openqdc" + remote_cache = "https://storage.googleapis.com/qmdata-public/openqdc" return remote_cache From 6d2500fad35a97b5afafc9243354b7e60e89e152 Mon Sep 17 00:00:00 2001 From: FNTwin Date: Tue, 6 Feb 2024 06:21:10 -0700 Subject: [PATCH 8/8] Fix writing and reading, spice read + pcqm init, os.environ --- src/openqdc/datasets/pcqm.py | 7 ------- src/openqdc/datasets/spice.py | 2 +- src/openqdc/utils/io.py | 22 ++++++++++++++-------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/openqdc/datasets/pcqm.py b/src/openqdc/datasets/pcqm.py index d1a344c..543c494 100644 --- a/src/openqdc/datasets/pcqm.py +++ b/src/openqdc/datasets/pcqm.py @@ -73,9 +73,6 @@ class PCQM_PM6(BaseDataset): __force_methods__ = [] force_target_names = [] - def __init__(self, energy_unit=None, distance_unit=None) -> None: - super().__init__(energy_unit=energy_unit, distance_unit=distance_unit) - @property def root(self): return p_join(get_local_cache(), "pubchemqc") @@ -161,8 +158,4 @@ def collate_and_save_list(self, list_entries): class PCQM_B3LYP(PCQM_PM6): __name__ = "pubchemqc_b3lyp" __energy_methods__ = ["b3lyp"] - energy_target_names = ["b3lyp"] - - def __init__(self, energy_unit=None, distance_unit=None) -> None: - super().__init__(energy_unit=energy_unit, distance_unit=distance_unit) diff --git a/src/openqdc/datasets/spice.py b/src/openqdc/datasets/spice.py index 974d45f..0b90912 100644 --- a/src/openqdc/datasets/spice.py +++ b/src/openqdc/datasets/spice.py @@ -13,7 +13,7 @@ def read_record(r): smiles = r["smiles"].asstr()[0] subset = r["subset"][0].decode("utf-8") n_confs = r["conformations"].shape[0] - x = get_atomic_number_and_charge(dm.to_mol(smiles, add_hs=True)) + x = get_atomic_number_and_charge(dm.to_mol(smiles, remove_hs=False, ordered=True)) positions = r["conformations"][:] res = dict( diff --git a/src/openqdc/utils/io.py b/src/openqdc/utils/io.py index d157ee6..2503031 100644 --- a/src/openqdc/utils/io.py +++ b/src/openqdc/utils/io.py @@ -11,9 +11,12 @@ from rdkit.Chem import MolFromXYZFile gcp_filesys = fsspec.filesystem("gs") +gcp_filesys_public = fsspec.filesystem("https") local_filesys = LocalFileSystem() -_OPENQDC_CACHE_DIR = "~/.cache/openqdc" +_OPENQDC_CACHE_DIR = ( + "~/.cache/openqdc" if "OPENQDC_CACHE_DIR" not in os.environ else os.path.normpath(os.environ["OPENQDC_CACHE_DIR"]) +) def set_cache_dir(d): @@ -26,7 +29,7 @@ def set_cache_dir(d): if d is None: return global _OPENQDC_CACHE_DIR - _OPENQDC_CACHE_DIR = os.path.expanduser(d) + _OPENQDC_CACHE_DIR = os.path.normpath(os.path.expanduser(d)) def get_local_cache() -> str: @@ -41,15 +44,18 @@ def get_local_cache() -> str: return cache_dir -def get_remote_cache() -> str: - remote_cache = "gs://qmdata-public/openqdc" +def get_remote_cache(write_access=False) -> str: + if write_access: + remote_cache = "gs://qmdata-public/openqdc" + else: + remote_cache = "https://storage.googleapis.com/qmdata-public/openqdc" return remote_cache def push_remote(local_path, overwrite=True): - remote_path = local_path.replace(get_local_cache(), get_remote_cache()) + remote_path = local_path.replace(get_local_cache(), get_remote_cache(write_access=overwrite)) gcp_filesys.mkdirs(os.path.dirname(remote_path), exist_ok=False) - # print(f"Pushing {local_path} file to {remote_path}, ({gcp_filesys.exists(os.path.dirname(remote_path))})") + print(f"Pushing {local_path} file to {remote_path}, ({gcp_filesys.exists(os.path.dirname(remote_path))})") if not gcp_filesys.exists(remote_path) or overwrite: gcp_filesys.put_file(local_path, remote_path) return remote_path @@ -60,13 +66,13 @@ def pull_locally(local_path, overwrite=False): os.makedirs(os.path.dirname(local_path), exist_ok=True) if not os.path.exists(local_path) or overwrite: # print(f"Pulling {remote_path} file to {local_path}") - gcp_filesys.get_file(remote_path, local_path) + gcp_filesys_public.get_file(remote_path, local_path) return local_path def copy_exists(local_path): remote_path = local_path.replace(get_local_cache(), get_remote_cache()) - return os.path.exists(local_path) or gcp_filesys.exists(remote_path) + return os.path.exists(local_path) or gcp_filesys_public.exists(remote_path) def makedirs_gcs(path, exist_ok=True):