You are currently on a failover version of the Materials Cloud Archive hosted at CINECA, Italy.
Click here to access the main Materials Cloud Archive.
Note: If the link above redirects you to this page, it means that the Archive is currently offline due to maintenance. We will be back online as soon as possible.
This version is read-only: you can view published records and download files, but you cannot create new records or make changes to existing ones.

Electronic excited states from physically-constrained machine learning


JSON Export

{
  "metadata": {
    "edited_by": 576, 
    "owner": 219, 
    "_oai": {
      "id": "oai:materialscloud.org:2094"
    }, 
    "description": "Data-driven techniques are increasingly used to replace electronic-structure calculations of matter. In this context, a relevant question is whether machine learning (ML) should be applied directly to predict the desired properties or be combined explicitly with physically-grounded operations. We present an example of an integrated modeling approach, in which a symmetry-adapted ML model of an effective Hamiltonian is trained to reproduce electronic excitations from a quantum-mechanical calculation. The resulting model can make predictions for molecules that are much larger and more complex than those that it is trained on, and allows for dramatic computational savings by indirectly targeting the outputs of well-converged calculations while using a parameterization corresponding to a minimal atom-centered basis. Our results on a comprehensive dataset of hydrocarbons emphasize the merits of intertwining data-driven techniques with physical approximations, improving the transferability and interpretability of ML models without affecting their accuracy and computational efficiency, and providing a blueprint for developing ML-augmented electronic-structure methods.\nHere we include the dataset, accompanying the paper linked below, of hydrocarbons including ethane, ethene, butadiene, hexane, hexatriene, isoprene, styrene, polyalkenes (dodecahexaene, tetradecaheptaene, hexadecaoctaene, octadecanonaene, eicosadecaene), aromatics (benzene, azulene, naphthalene, biphenyl), anthracene, beta-carotene, fullerene. We also provide scripts to generate the Fock and overlap matrices in this dataset. The code for machine learning can be found at the Software reference below.", 
    "mcid": "2024.34", 
    "id": "2094", 
    "license": "Creative Commons Attribution 4.0 International", 
    "license_addendum": null, 
    "references": [
      {
        "citation": "E. Cignoni, D. Suman, J. Nigam, L. Cupellini, B. Mennucci, and M. Ceriotti, arXiv preprint arXiv:2311.00844.", 
        "type": "Preprint", 
        "url": "https://arxiv.org/abs/2311.00844", 
        "comment": "Preprint in which the data is described"
      }, 
      {
        "citation": "E. Cignoni, Hamiltonian learning for excited states (HaLEx)", 
        "type": "Software", 
        "url": "https://github.com/ecignoni/halex/", 
        "comment": "Github repository with the code for generating data and machine learning"
      }
    ], 
    "doi": "10.24435/materialscloud:j2-58", 
    "keywords": [
      "ERC", 
      "hamiltonian", 
      "excited states", 
      "machine learning", 
      "EPFL", 
      "FIAMMA", 
      "LIFETimeS"
    ], 
    "contributors": [
      {
        "affiliations": [
          "Dipartimento di Chimica e Chimica Industriale, Universit\u00e0 di Pisa, Pisa, Italy"
        ], 
        "familyname": "Cignoni", 
        "email": "edoardo.cignoni@phd.unipi.it", 
        "givennames": "Edoardo"
      }, 
      {
        "affiliations": [
          "Laboratory of Computational Science and Modeling, IMX, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland"
        ], 
        "familyname": "Suman", 
        "givennames": "Divya"
      }, 
      {
        "affiliations": [
          "Laboratory of Computational Science and Modeling, IMX, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland"
        ], 
        "familyname": "Nigam", 
        "email": "jigyasa.nigam@epfl.ch", 
        "givennames": "Jigyasa"
      }, 
      {
        "affiliations": [
          "Dipartimento di Chimica e Chimica Industriale, Universit\u00e0 di Pisa, Pisa, Italy"
        ], 
        "familyname": "Cupellini", 
        "givennames": "Lorenzo"
      }, 
      {
        "affiliations": [
          "Dipartimento di Chimica e Chimica Industriale, Universit\u00e0 di Pisa, Pisa, Italy"
        ], 
        "familyname": "Mennucci", 
        "givennames": "Benedetta"
      }, 
      {
        "affiliations": [
          "Division of Chemistry and Chemical Engineering, California Institute of Technology, Pasadena, CA, USA", 
          "Laboratory of Computational Science and Modeling, IMX, \u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne, 1015 Lausanne, Switzerland"
        ], 
        "familyname": "Ceriotti", 
        "givennames": "Michele"
      }
    ], 
    "conceptrecid": "2054", 
    "version": 2, 
    "publication_date": "Feb 20, 2024, 14:55:46", 
    "is_last": true, 
    "status": "published", 
    "_files": [
      {
        "size": 3567, 
        "checksum": "md5:42c4dda53ca3a875a4a3fd66bd41d461", 
        "description": "README describing the repository architecture and data", 
        "key": "README.md"
      }, 
      {
        "size": 4471422836, 
        "checksum": "md5:ad20d9c9ca8109ac24f845bb383162f9", 
        "description": "Dataset of hydrocarbons of varying conjugation, lengths and aromaticity along with scripts to run calculations and produce figures", 
        "key": "dataset.tar.gz"
      }
    ], 
    "title": "Electronic excited states from physically-constrained machine learning"
  }, 
  "id": "2094", 
  "updated": "2024-02-20T13:55:46.970570+00:00", 
  "created": "2024-02-20T12:00:57.637265+00:00", 
  "revision": 2
}