2024

Johnson, Holly M.; Gusev, Filipp; Dull, Jordan T.; Seo, Yejoon; Priestley, Rodney D.; Isayev, Olexandr; Rand, Barry P.
Discovery of Crystallizable Organic Semiconductors with Machine Learning Journal Article
In: J. Am. Chem. Soc., vol. 146, no. 31, pp. 21583–21590, 2024, ISSN: 1520-5126.
Abstract | Links | BibTeX | Tags: Active learning, Crystal structure
@article{Johnson2024,
title = {Discovery of Crystallizable Organic Semiconductors with Machine Learning},
author = {Holly M. Johnson and Filipp Gusev and Jordan T. Dull and Yejoon Seo and Rodney D. Priestley and Olexandr Isayev and Barry P. Rand},
url = {https://olexandrisayev.com/wp-content/uploads/johnson-et-al-2024-discovery-of-crystallizable-organic-semiconductors-with-machine-learning-1.pdf},
doi = {10.1021/jacs.4c05245},
issn = {1520-5126},
year = {2024},
date = {2024-08-07},
urldate = {2024-08-07},
journal = {J. Am. Chem. Soc.},
volume = {146},
number = {31},
pages = {21583--21590},
publisher = {American Chemical Society (ACS)},
abstract = {Crystalline organic semiconductors are known to have improved charge carrier mobility and exciton diffusion length in comparison to their amorphous counterparts. Certain organic molecular thin films can be transitioned from initially prepared amorphous layers to large-scale crystalline films via abrupt thermal annealing. Ideally, these films crystallize as platelets with long-range-ordered domains on the scale of tens to hundreds of microns. However, other organic molecular thin films may instead crystallize as spherulites or resist crystallization entirely. Organic molecules that have the capability of transforming into a platelet morphology feature both high melting point (Tm) and crystallization driving force (ΔGc). In this work, we employed machine learning (ML) to identify candidate organic materials with the potential to crystallize into platelets by estimating the aforementioned thermal properties. Six organic molecules identified by the ML algorithm were experimentally evaluated; three crystallized as platelets, one crystallized as a spherulite, and two resisted thin film crystallization. These results demonstrate a successful application of ML in the scope of predicting thermal properties of organic molecules and reinforce the principles of Tm and ΔGc as metrics that aid in predicting the crystallization behavior of organic thin films.},
keywords = {Active learning, Crystal structure},
pubstate = {published},
tppubtype = {article}
}

Zhang, Shuhao; Makoś, Małgorzata Z.; Jadrich, Ryan B.; Kraka, Elfi; Barros, Kipton; Nebgen, Benjamin T.; Tretiak, Sergei; Isayev, Olexandr; Lubbers, Nicholas; Messerly, Richard A.; Smith, Justin S.
Exploring the frontiers of condensed-phase chemistry with a general reactive machine learning potential Journal Article
In: Nat. Chem., 2024.
Abstract | Links | BibTeX | Tags: Active learning, ANI, Organic reactions
@article{Zhang2024,
title = {Exploring the frontiers of condensed-phase chemistry with a general reactive machine learning potential},
author = {Shuhao Zhang and Ma\lgorzata Z. Mako\'{s} and Ryan B. Jadrich and Elfi Kraka and Kipton Barros and Benjamin T. Nebgen and Sergei Tretiak and Olexandr Isayev and Nicholas Lubbers and Richard A. Messerly and Justin S. Smith},
doi = {10.1038/s41557-023-01427-3},
year = {2024},
date = {2024-03-07},
urldate = {2024-03-07},
journal = {Nat. Chem.},
publisher = {Springer Science and Business Media LLC},
abstract = {Atomistic simulation has a broad range of applications from drug design to materials discovery. Machine learning interatomic potentials (MLIPs) have become an efficient alternative to computationally expensive ab initio simulations. For this reason, chemistry and materials science would greatly benefit from a general reactive MLIP, that is, an MLIP that is applicable to a broad range of reactive chemistry without the need for refitting. Here we develop a general reactive MLIP (ANI-1xnr) through automated sampling of condensed-phase reactions. ANI-1xnr is then applied to study five distinct systems: carbon solid-phase nucleation, graphene ring formation from acetylene, biofuel additives, combustion of methane and the spontaneous formation of glycine from early earth small molecules. In all studies, ANI-1xnr closely matches experiment (when available) and/or previous studies using traditional model chemistry methods. As such, ANI-1xnr proves to be a highly general reactive MLIP for C, H, N and O elements in the condensed phase, enabling high-throughput in silico reactive chemistry experimentation.},
keywords = {Active learning, ANI, Organic reactions},
pubstate = {published},
tppubtype = {article}
}

Sattari, Kianoosh; Li, Dawei; Kalita, Bhupalee; Xie, Yunchao; Lighvan, Fatemeh Barmaleki; Isayev, Olexandr; Lin, Jian
De novo molecule design towards biased properties via a deep generative framework and iterative transfer learning Journal Article
In: Digital Discovery, vol. 3, no. 2, pp. 410–421, 2024.
Abstract | Links | BibTeX | Tags: Active learning, Generative AI
@article{Sattari2024,
title = {\textit{De novo} molecule design towards biased properties \textit{via} a deep generative framework and iterative transfer learning},
author = {Kianoosh Sattari and Dawei Li and Bhupalee Kalita and Yunchao Xie and Fatemeh Barmaleki Lighvan and Olexandr Isayev and Jian Lin},
doi = {10.1039/d3dd00210a},
year = {2024},
date = {2024-02-14},
urldate = {2024-02-14},
journal = {Digital Discovery},
volume = {3},
number = {2},
pages = {410--421},
publisher = {Royal Society of Chemistry (RSC)},
abstract = {De novo design of molecules with targeted properties represents a new frontier in molecule development. Despite enormous progress, two main challenges remain: (i) generating novel molecules conditioned on targeted, continuous property values; (ii) obtaining molecules with property values beyond the range in the training data. To tackle these challenges, we propose a reinforced regressional and conditional generative adversarial network (RRCGAN) to generate chemically valid molecules with targeted HOMO\textendashLUMO energy gap (ΔEH\textendashL) as a proof-of-concept study. As validated by density functional theory (DFT) calculation, 75% of the generated molecules have a relative error (RE) of \<20% of the targeted ΔEH\textendashL values. To bias the generation toward the ΔEH\textendashL values beyond the range of the original training molecules, transfer learning was applied to iteratively retrain the RRCGAN model. After just two iterations, the mean ΔEH\textendashL of the generated molecules increases to 8.7 eV from the mean value of 5.9 eV shown in the initial training dataset. Qualitative and quantitative analyses reveal that the model has successfully captured the underlying structure\textendashproperty relationship, which agrees well with the established physical and chemical rules. These results present a trustworthy, purely data-driven methodology for the highly efficient generation of novel molecules with different targeted properties.},
keywords = {Active learning, Generative AI},
pubstate = {published},
tppubtype = {article}
}
2023

Gusev, Filipp; Gutkin, Evgeny; Kurnikova, Maria G.; Isayev, Olexandr
Active Learning Guided Drug Design Lead Optimization Based on Relative Binding Free Energy Modeling Journal Article
In: J. Chem. Inf. Model., vol. 63, no. 2, pp. 583–594, 2023.
Abstract | Links | BibTeX | Tags: Active learning, Drug Discovery
@article{Gusev2023,
title = {Active Learning Guided Drug Design Lead Optimization Based on Relative Binding Free Energy Modeling},
author = {Filipp Gusev and Evgeny Gutkin and Maria G. Kurnikova and Olexandr Isayev},
doi = {10.1021/acs.jcim.2c01052},
year = {2023},
date = {2023-01-23},
urldate = {2023-01-23},
journal = {J. Chem. Inf. Model.},
volume = {63},
number = {2},
pages = {583--594},
publisher = {American Chemical Society (ACS)},
abstract = {In silico identification of potent protein inhibitors commonly requires prediction of a ligand binding free energy (BFE). Thermodynamics integration (TI) based on molecular dynamics (MD) simulations is a BFE calculation method capable of acquiring accurate BFE, but it is computationally expensive and time-consuming. In this work, we have developed an efficient automated workflow for identifying compounds with the lowest BFE among thousands of congeneric ligands, which requires only hundreds of TI calculations. Automated machine learning (AutoML) orchestrated by active learning (AL) in an AL\textendashAutoML workflow allows unbiased and efficient search for a small set of best-performing molecules. We have applied this workflow to select inhibitors of the SARS-CoV-2 papain-like protease and were able to find 133 compounds with improved binding affinity, including 16 compounds with better than 100-fold binding affinity improvement. We obtained a hit rate that outperforms that expected of traditional expert medicinal chemist-guided campaigns. Thus, we demonstrate that the combination of AL and AutoML with free energy simulations provides at least 20× speedup relative to the na\"{i}ve brute force approaches.},
keywords = {Active learning, Drug Discovery},
pubstate = {published},
tppubtype = {article}
}