2023

Zhao, Qiyuan; Anstine, Dylan M.; Isayev, Olexandr; Savoie, Brett M.
Δ2 machine learning for reaction property prediction Journal Article
In: Chem. Sci., vol. 14, no. 46, pp. 13392–13401, 2023.
Abstract | Links | BibTeX | Tags: AIMNet, Machine learning potential, Organic reactions
@article{Zhao2023b,
title = {Δ^{2} machine learning for reaction property prediction},
author = {Qiyuan Zhao and Dylan M. Anstine and Olexandr Isayev and Brett M. Savoie},
doi = {10.1039/d3sc02408c},
year = {2023},
date = {2023-11-29},
urldate = {2023-11-29},
journal = {Chem. Sci.},
volume = {14},
number = {46},
pages = {13392--13401},
publisher = {Royal Society of Chemistry (RSC)},
abstract = {The emergence of Δ-learning models, whereby machine learning (ML) is used to predict a correction to a low-level energy calculation, provides a versatile route to accelerate high-level energy evaluations at a given geometry. However, Δ-learning models are inapplicable to reaction properties like heats of reaction and activation energies that require both a high-level geometry and energy evaluation. Here, a Δ2-learning model is introduced that can predict high-level activation energies based on low-level critical-point geometries. The Δ2 model uses an atom-wise featurization typical of contemporary ML interatomic potentials (MLIPs) and is trained on a dataset of ∼167 000 reactions, using the GFN2-xTB energy and critical-point geometry as a low-level input and the B3LYP-D3/TZVP energy calculated at the B3LYP-D3/TZVP critical point as a high-level target. The excellent performance of the Δ2 model on unseen reactions demonstrates the surprising ease with which the model implicitly learns the geometric deviations between the low-level and high-level geometries that condition the activation energy prediction. The transferability of the Δ2 model is validated on several external testing sets where it shows near chemical accuracy, illustrating the benefits of combining ML models with readily available physical-based information from semi-empirical quantum chemistry calculations. Fine-tuning of the Δ2 model on a small number of Gaussian-4 calculations produced a 35% accuracy improvement over DFT activation energy predictions while retaining xTB-level cost. The Δ2 model approach proves to be an efficient strategy for accelerating chemical reaction characterization with minimal sacrifice in prediction accuracy.},
keywords = {AIMNet, Machine learning potential, Organic reactions},
pubstate = {published},
tppubtype = {article}
}

Liu, Zhen; Moroz, Yurii S.; Isayev, Olexandr
The challenge of balancing model sensitivity and robustness in predicting yields: a benchmarking study of amide coupling reactions Journal Article
In: Chem. Sci., vol. 14, no. 39, pp. 10835–10846, 2023.
Abstract | Links | BibTeX | Tags: AIMNet, Organic reactions
@article{Liu2023,
title = {The challenge of balancing model sensitivity and robustness in predicting yields: a benchmarking study of amide coupling reactions},
author = {Zhen Liu and Yurii S. Moroz and Olexandr Isayev},
doi = {10.1039/d3sc03902a},
year = {2023},
date = {2023-10-11},
urldate = {2023-10-11},
journal = {Chem. Sci.},
volume = {14},
number = {39},
pages = {10835--10846},
publisher = {Royal Society of Chemistry (RSC)},
abstract = {Accurate prediction of reaction yield is the holy grail for computer-assisted synthesis prediction, but current models have failed to generalize to large literature datasets. To understand the causes and inspire future design, we systematically benchmarked the yield prediction task. We carefully curated and augmented a literature dataset of 41 239 amide coupling reactions, each with information on reactants, products, intermediates, yields, and reaction contexts, and provided 3D structures for the molecules. We calculated molecular features related to 2D and 3D structure information, as well as physical and electronic properties. These descriptors were paired with 4 categories of machine learning methods (linear, kernel, ensemble, and neural network), yielding valuable benchmarks about feature and model performance. Despite the excellent performance on a high-throughput experiment (HTE) dataset (R2 around 0.9), no method gave satisfactory results on the literature data. The best performance was an R2 of 0.395 ± 0.020 using the stack technique. Error analysis revealed that reactivity cliff and yield uncertainty are among the main reasons for incorrect predictions. Removing reactivity cliffs and uncertain reactions boosted the R2 to 0.457 ± 0.006. These results highlight that yield prediction models must be sensitive to the reactivity change due to the subtle structure variance, as well as be robust to the uncertainty associated with yield measurements.},
keywords = {AIMNet, Organic reactions},
pubstate = {published},
tppubtype = {article}
}

Anstine, Dylan M.; Isayev, Olexandr
Machine Learning Interatomic Potentials and Long-Range Physics Journal Article
In: J. Phys. Chem. A, vol. 127, no. 11, pp. 2417–2431, 2023, ISSN: 1520-5215.
Abstract | Links | BibTeX | Tags: AIMNet, ANI, Machine learning potential, Review
@article{Anstine2023,
title = {Machine Learning Interatomic Potentials and Long-Range Physics},
author = {Dylan M. Anstine and Olexandr Isayev},
doi = {10.1021/acs.jpca.2c06778},
issn = {1520-5215},
year = {2023},
date = {2023-03-23},
urldate = {2023-03-23},
journal = {J. Phys. Chem. A},
volume = {127},
number = {11},
pages = {2417--2431},
publisher = {American Chemical Society (ACS)},
abstract = {Advances in machine learned interatomic potentials (MLIPs), such as those using neural networks, have resulted in short-range models that can infer interaction energies with near ab initio accuracy and orders of magnitude reduced computational cost. For many atom systems, including macromolecules, biomolecules, and condensed matter, model accuracy can become reliant on the description of short- and long-range physical interactions. The latter terms can be difficult to incorporate into an MLIP framework. Recent research has produced numerous models with considerations for nonlocal electrostatic and dispersion interactions, leading to a large range of applications that can be addressed using MLIPs. In light of this, we present a Perspective focused on key methodologies and models being used where the presence of nonlocal physics and chemistry are crucial for describing system properties. The strategies covered include MLIPs augmented with dispersion corrections, electrostatics calculated with charges predicted from atomic environment descriptors, the use of self-consistency and message passing iterations to propagated nonlocal system information, and charges obtained via equilibration schemes. We aim to provide a pointed discussion to support the development of machine learning-based interatomic potentials for systems where contributions from only nearsighted terms are deficient.},
keywords = {AIMNet, ANI, Machine learning potential, Review},
pubstate = {published},
tppubtype = {article}
}
2021

Zubatyuk, Roman; Smith, Justin S.; Nebgen, Benjamin T.; Tretiak, Sergei; Isayev, Olexandr
Teaching a neural network to attach and detach electrons from molecules Journal Article
In: Nat Commun, vol. 12, no. 1, 2021, ISSN: 2041-1723.
Abstract | Links | BibTeX | Tags: AIMNet, Machine learning potential
@article{Zubatyuk2021,
title = {Teaching a neural network to attach and detach electrons from molecules},
author = {Roman Zubatyuk and Justin S. Smith and Benjamin T. Nebgen and Sergei Tretiak and Olexandr Isayev},
doi = {10.1038/s41467-021-24904-0},
issn = {2041-1723},
year = {2021},
date = {2021-08-11},
journal = {Nat Commun},
volume = {12},
number = {1},
publisher = {Springer Science and Business Media LLC},
abstract = {Interatomic potentials derived with Machine Learning algorithms such as Deep-Neural Networks (DNNs), achieve the accuracy of high-fidelity quantum mechanical (QM) methods in areas traditionally dominated by empirical force fields and allow performing massive simulations. Most DNN potentials were parametrized for neutral molecules or closed-shell ions due to architectural limitations. In this work, we propose an improved machine learning framework for simulating open-shell anions and cations. We introduce the AIMNet-NSE (Neural Spin Equilibration) architecture, which can predict molecular energies for an arbitrary combination of molecular charge and spin multiplicity with errors of about 2\textendash3 kcal/mol and spin-charges with error errors ~0.01e for small and medium-sized organic molecules, compared to the reference QM simulations. The AIMNet-NSE model allows to fully bypass QM calculations and derive the ionization potential, electron affinity, and conceptual Density Functional Theory quantities like electronegativity, hardness, and condensed Fukui functions. We show that these descriptors, along with learned atomic representations, could be used to model chemical reactivity through an example of regioselectivity in electrophilic aromatic substitution reactions.},
keywords = {AIMNet, Machine learning potential},
pubstate = {published},
tppubtype = {article}
}

Zubatiuk, Tetiana; Isayev, Olexandr
Development of Multimodal Machine Learning Potentials: Toward a Physics-Aware Artificial Intelligence Journal Article
In: Acc. Chem. Res., vol. 54, no. 7, pp. 1575–1585, 2021, ISSN: 1520-4898.
Links | BibTeX | Tags: AIMNet, Machine learning potential
@article{Zubatiuk2021,
title = {Development of Multimodal Machine Learning Potentials: Toward a Physics-Aware Artificial Intelligence},
author = {Tetiana Zubatiuk and Olexandr Isayev},
doi = {10.1021/acs.accounts.0c00868},
issn = {1520-4898},
year = {2021},
date = {2021-04-06},
urldate = {2021-04-06},
journal = {Acc. Chem. Res.},
volume = {54},
number = {7},
pages = {1575--1585},
publisher = {American Chemical Society (ACS)},
keywords = {AIMNet, Machine learning potential},
pubstate = {published},
tppubtype = {article}
}