You are currently on a failover version of the Materials Cloud Archive hosted at CINECA, Italy.
Click here to access the main Materials Cloud Archive.
Note: If the link above redirects you to this page, it means that the Archive is currently offline due to maintenance. We will be back online as soon as possible.
This version is read-only: you can view published records and download files, but you cannot create new records or make changes to existing ones.

Analysis of bootstrap and subsampling in high-dimensional regularized regression (code)


JSON Export

{
  "metadata": {
    "edited_by": 576, 
    "references": [
      {
        "type": "Journal reference", 
        "citation": "L. Clart\u00e9, A. Vandenbroucque, G. Dalle, B. Loureiro, F. Krzakala, L. Zdeborov\u00e1, Proceedings of the Fortieth Conference on Uncertainty in Artificial Intelligence, PMLR 244, 787-819 (2024)", 
        "url": "https://proceedings.mlr.press/v244/clarte24a.html"
      }
    ], 
    "license_addendum": null, 
    "status": "published", 
    "is_last": true, 
    "contributors": [
      {
        "email": "lucas.clarte@epfl.ch", 
        "givennames": "Lucas", 
        "familyname": "Clarte", 
        "affiliations": [
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Statistical Physics of Computation laboratory, CH-1015 Lausanne, Switzerland"
        ]
      }, 
      {
        "affiliations": [
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Statistical Physics of Computation laboratory, CH-1015 Lausanne, Switzerland", 
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Information, Learning and Physics laboratory, CH-1015 Lausanne, Switzerland"
        ], 
        "givennames": "Adrien", 
        "familyname": "Vandenbroucque"
      }, 
      {
        "affiliations": [
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Information and Network Dynamics laboratory, CH-1015 Lausanne, Switzerland", 
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Statistical Physics of Computation laboratory, CH-1015 Lausanne, Switzerland", 
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Information, Learning and Physics laboratory, CH-1015 Lausanne, Switzerland"
        ], 
        "givennames": "Guillaume", 
        "familyname": "Dalle"
      }, 
      {
        "affiliations": [
          "D\u00e9partement d\u2019Informatique, \u00c9cole Normale Sup\u00e9rieure - PSL & CNRS, Paris, France"
        ], 
        "givennames": "Bruno", 
        "familyname": "Loureiro"
      }, 
      {
        "affiliations": [
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Information, Learning and Physics laboratory, CH-1015 Lausanne, Switzerland"
        ], 
        "givennames": "Florent", 
        "familyname": "Krzakala"
      }, 
      {
        "affiliations": [
          "\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne (EPFL), Statistical Physics of Computation laboratory, CH-1015 Lausanne, Switzerland"
        ], 
        "givennames": "Lenka", 
        "familyname": "Zdeborova"
      }
    ], 
    "doi": "10.24435/materialscloud:az-j9", 
    "conceptrecid": "2333", 
    "owner": 1130, 
    "_oai": {
      "id": "oai:materialscloud.org:2334"
    }, 
    "version": 1, 
    "keywords": [
      "MARVEL/P2", 
      "uncertainty quantification", 
      "neural networks", 
      "numerical simulation"
    ], 
    "description": "We investigate popular resampling methods for estimating the uncertainty of statistical models, such as subsampling, bootstrap and the jackknife, and their performance in high-dimensional supervised regression tasks. We provide a tight asymptotic description of the biases and variances estimated by these methods in the context of generalized linear models, such as ridge and logistic regression, taking the limit where the number of samples n and dimension d of the covariates grow at a comparable fixed rate \u03b1 = n/d. Our findings are three-fold: i) resampling methods are fraught with problems in high dimensions and exhibit the double-descent-like behavior typical of these situations; ii) only when \u03b1 is large enough do they provide consistent and reliable error estimations (we give convergence rates); iii) in the over-parametrized regime \u03b1 < 1 relevant to modern machine learning practice, their predictions are not consistent, even with optimal regularization.\nThis record provides the code to reproduce the numerical experiments of the related paper \"Analysis of bootstrap and subsampling in high-dimensional regularized regression\".", 
    "_files": [
      {
        "size": 979959, 
        "checksum": "md5:9ecf4b0632902209f673b53919ac1512", 
        "description": "Compressed files contained in the repository https://github.com/spoc-group/BootstrapAsymptotics", 
        "key": "BootstrapAsymptotics-main.zip"
      }, 
      {
        "size": 500, 
        "checksum": "md5:98c73ff79efc66b38ed648aad8eef65e", 
        "description": "README file describing the structure of the code", 
        "key": "README.txt"
      }
    ], 
    "publication_date": "Jan 30, 2025, 17:09:52", 
    "license": "Creative Commons Attribution 4.0 International", 
    "title": "Analysis of bootstrap and subsampling in high-dimensional regularized regression (code)", 
    "mcid": "2025.25", 
    "id": "2334"
  }, 
  "revision": 4, 
  "created": "2024-09-17T15:42:40.404698+00:00", 
  "id": "2334", 
  "updated": "2025-01-30T16:09:52.248580+00:00"
}