@article {114, title = {Genetics of 35 blood and urine biomarkers in the UK Biobank.}, journal = {Nat Genet}, volume = {53}, year = {2021}, month = {2021 02}, pages = {185-194}, abstract = {

Clinical laboratory tests are a critical component of the continuum of care. We evaluate the genetic basis of 35 blood and urine laboratory measurements in the UK Biobank (n = 363,228 individuals). We identify 1,857 loci associated with at least one trait, containing 3,374 fine-mapped associations and additional sets of large-effect (>0.1 s.d.) protein-altering, human leukocyte antigen (HLA) and copy number variant (CNV) associations. Through Mendelian randomization (MR) analysis, we discover 51 causal relationships, including previously known agonistic effects of urate on gout and cystatin C on stroke. Finally, we develop polygenic risk scores (PRSs) for each biomarker and build {\textquoteright}multi-PRS{\textquoteright} models for diseases using 35 PRSs simultaneously, which improved chronic kidney disease, type 2 diabetes, gout and alcoholic cirrhosis genetic risk stratification in an independent dataset (FinnGen; n = 135,500) relative to single-disease PRSs. Together, our results delineate the genetic basis of biomarkers and their causal influences on diseases and improve genetic risk stratification for common diseases.

}, keywords = {Biological Specimen Banks, Biomarkers, Cardiovascular Diseases, Diabetes Mellitus, Type 2, DNA Copy Number Variations, Genetic Pleiotropy, HLA Antigens, Humans, Linkage Disequilibrium, Liver-Specific Organic Anion Transporter 1, Mendelian Randomization Analysis, Polymorphism, Single Nucleotide, Proteins, Renal Insufficiency, Chronic, Serine Endopeptidases, United Kingdom}, issn = {1546-1718}, doi = {10.1038/s41588-020-00757-z}, author = {Sinnott-Armstrong, Nasa and Tanigawa, Yosuke and Amar, David and Mars, Nina and Benner, Christian and Aguirre, Matthew and Venkataraman, Guhan Ram and Wainberg, Michael and Ollila, Hanna M and Kiiskinen, Tuomo and Havulinna, Aki S and Pirruccello, James P and Qian, Junyang and Shcherbina, Anna and Rodriguez, Fatima and Assimes, Themistocles L and Agarwala, Vineeta and Tibshirani, Robert and Hastie, Trevor and Ripatti, Samuli and Pritchard, Jonathan K and Daly, Mark J and Rivas, Manuel A} } @article {137, title = {Survival Analysis on Rare Events Using Group-Regularized Multi-Response Cox Regression.}, journal = {Bioinformatics}, year = {2021}, month = {2021 Feb 09}, abstract = {

MOTIVATION: The prediction performance of Cox proportional hazard model suffers when there are only few uncensored events in the training data.

RESULTS: We propose a Sparse-Group regularized Cox regression method to improve the prediction performance of large-scale and high-dimensional survival data with few observed events. Our approach is applicable when there is one or more other survival responses that 1. has a large number of observed events; 2. share a common set of associated predictors with the rare event response. This scenario is common in the UK Biobank (Sudlow et al., 2015) dataset where records for a large number of common and less prevalent diseases of the same set of individuals are available. By analyzing these responses together, we hope to achieve higher prediction performance than when they are analyzed individually. To make this approach practical for large-scale data, we developed an accelerated proximal gradient optimization algorithm as well as a screening procedure inspired by Qian et al. (2020).

AVAILABILITY: https://github.com/rivas-lab/multisnpnet-Cox.

SUPPLEMENTARY INFORMATION: Supplementary data are available at Bioinformatics online.

}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btab095}, author = {Li, Ruilin and Tanigawa, Yosuke and Justesen, Johanne M and Taylor, Jonathan and Hastie, Trevor and Tibshirani, Robert and Rivas, Manuel A} } @article {121, title = {A fast and scalable framework for large-scale and ultrahigh-dimensional sparse regression with application to the UK Biobank.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 10}, pages = {e1009141}, abstract = {

The UK Biobank is a very large, prospective population-based cohort study across the United Kingdom. It provides unprecedented opportunities for researchers to investigate the relationship between genotypic information and phenotypes of interest. Multiple regression methods, compared with genome-wide association studies (GWAS), have already been showed to greatly improve the prediction performance for a variety of phenotypes. In the high-dimensional settings, the lasso, since its first proposal in statistics, has been proved to be an effective method for simultaneous variable selection and estimation. However, the large-scale and ultrahigh dimension seen in the UK Biobank pose new challenges for applying the lasso method, as many existing algorithms and their implementations are not scalable to large applications. In this paper, we propose a computational framework called batch screening iterative lasso (BASIL) that can take advantage of any existing lasso solver and easily build a scalable solution for very large data, including those that are larger than the memory size. We introduce snpnet, an R package that implements the proposed algorithm on top of glmnet and optimizes for single nucleotide polymorphism (SNP) datasets. It currently supports l1-penalized linear model, logistic regression, Cox model, and also extends to the elastic net with l1/l2 penalty. We demonstrate results on the UK Biobank dataset, where we achieve competitive predictive performance for all four phenotypes considered (height, body mass index, asthma, high cholesterol) using only a small fraction of the variants compared with other established polygenic risk score methods.

}, keywords = {Algorithms, Asthma, Biological Specimen Banks, Body Height, Body Mass Index, Cholesterol, Cohort Studies, Genetics, Population, Genome-Wide Association Study, Genotype, Humans, Logistic Models, Phenotype, Polymorphism, Single Nucleotide, Proportional Hazards Models, United Kingdom}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1009141}, author = {Qian, Junyang and Tanigawa, Yosuke and Du, Wenfei and Aguirre, Matthew and Chang, Chris and Tibshirani, Robert and Rivas, Manuel A and Hastie, Trevor} } @article {119, title = {Fast Lasso method for large-scale and ultrahigh-dimensional Cox model with applications to UK Biobank.}, journal = {Biostatistics}, year = {2020}, month = {2020 Sep 29}, abstract = {

We develop a scalable and highly efficient algorithm to fit a Cox proportional hazard model by maximizing the $L^1$-regularized (Lasso) partial likelihood function, based on the Batch Screening Iterative Lasso (BASIL) method developed in Qian and others (2019). Our algorithm is particularly suitable for large-scale and high-dimensional data that do not fit in the memory. The output of our algorithm is the full Lasso path, the parameter estimates at all predefined regularization parameters, as well as their validation accuracy measured using the concordance index (C-index) or the validation deviance. To demonstrate the effectiveness of our algorithm, we analyze a large genotype-survival time dataset across 306 disease outcomes from the UK Biobank (Sudlow and others, 2015). We provide a publicly available implementation of the proposed approach for genetics data on top of the PLINK2 package and name it snpnet-Cox.

}, issn = {1468-4357}, doi = {10.1093/biostatistics/kxaa038}, author = {Li, Ruilin and Chang, Christopher and Justesen, Johanne M and Tanigawa, Yosuke and Qiang, Junyang and Hastie, Trevor and Rivas, Manuel A and Tibshirani, Robert} }