@article {114, title = {Genetics of 35 blood and urine biomarkers in the UK Biobank.}, journal = {Nat Genet}, volume = {53}, year = {2021}, month = {2021 02}, pages = {185-194}, abstract = {

Clinical laboratory tests are a critical component of the continuum of care. We evaluate the genetic basis of 35 blood and urine laboratory measurements in the UK Biobank (n = 363,228 individuals). We identify 1,857 loci associated with at least one trait, containing 3,374 fine-mapped associations and additional sets of large-effect (>0.1 s.d.) protein-altering, human leukocyte antigen (HLA) and copy number variant (CNV) associations. Through Mendelian randomization (MR) analysis, we discover 51 causal relationships, including previously known agonistic effects of urate on gout and cystatin C on stroke. Finally, we develop polygenic risk scores (PRSs) for each biomarker and build {\textquoteright}multi-PRS{\textquoteright} models for diseases using 35 PRSs simultaneously, which improved chronic kidney disease, type 2 diabetes, gout and alcoholic cirrhosis genetic risk stratification in an independent dataset (FinnGen; n = 135,500) relative to single-disease PRSs. Together, our results delineate the genetic basis of biomarkers and their causal influences on diseases and improve genetic risk stratification for common diseases.

}, keywords = {Biological Specimen Banks, Biomarkers, Cardiovascular Diseases, Diabetes Mellitus, Type 2, DNA Copy Number Variations, Genetic Pleiotropy, HLA Antigens, Humans, Linkage Disequilibrium, Liver-Specific Organic Anion Transporter 1, Mendelian Randomization Analysis, Polymorphism, Single Nucleotide, Proteins, Renal Insufficiency, Chronic, Serine Endopeptidases, United Kingdom}, issn = {1546-1718}, doi = {10.1038/s41588-020-00757-z}, author = {Sinnott-Armstrong, Nasa and Tanigawa, Yosuke and Amar, David and Mars, Nina and Benner, Christian and Aguirre, Matthew and Venkataraman, Guhan Ram and Wainberg, Michael and Ollila, Hanna M and Kiiskinen, Tuomo and Havulinna, Aki S and Pirruccello, James P and Qian, Junyang and Shcherbina, Anna and Rodriguez, Fatima and Assimes, Themistocles L and Agarwala, Vineeta and Tibshirani, Robert and Hastie, Trevor and Ripatti, Samuli and Pritchard, Jonathan K and Daly, Mark J and Rivas, Manuel A} } @article {143, title = {Population-scale tissue transcriptomics maps long non-coding RNAs to complex disease.}, journal = {Cell}, volume = {184}, year = {2021}, month = {2021 05 13}, pages = {2633-2648.e19}, abstract = {

Long non-coding RNA (lncRNA) genes have well-established and important impacts on molecular and cellular functions. However, among the thousands of lncRNA genes, it is still a major challenge to identify the subset with disease or trait relevance. To systematically characterize these lncRNA genes, we used Genotype Tissue Expression (GTEx) project v8 genetic and multi-tissue transcriptomic data to profile the expression, genetic regulation, cellular contexts, and trait associations of 14,100 lncRNA genes across 49 tissues for 101 distinct complex genetic traits. Using these approaches, we identified 1,432 lncRNA gene-trait associations, 800 of which were not explained by stronger effects of neighboring protein-coding genes. This included associations between lncRNA quantitative trait loci and inflammatory bowel disease, type 1 and type 2 diabetes, and coronary artery disease, as well as rare variant associations to body mass index.

}, keywords = {Coronary Artery Disease, Diabetes Mellitus, Type 1, Diabetes Mellitus, Type 2, Disease, Gene Expression Profiling, Genetic Variation, Humans, Inflammatory Bowel Diseases, Multifactorial Inheritance, Organ Specificity, Population, Quantitative Trait Loci, RNA, Long Noncoding, Transcriptome}, issn = {1097-4172}, doi = {10.1016/j.cell.2021.03.050}, author = {de Goede, Olivia M and Nachun, Daniel C and Ferraro, Nicole M and Gloudemans, Michael J and Rao, Abhiram S and Smail, Craig and Eulalio, Tiffany Y and Aguet, Francois and Ng, Bernard and Xu, Jishu and Barbeira, Alvaro N and Castel, Stephane E and Kim-Hellmuth, Sarah and Park, YoSon and Scott, Alexandra J and Strober, Benjamin J and Brown, Christopher D and Wen, Xiaoquan and Hall, Ira M and Battle, Alexis and Lappalainen, Tuuli and Im, Hae Kyung and Ardlie, Kristin G and Mostafavi, Sara and Quertermous, Thomas and Kirkegaard, Karla and Montgomery, Stephen B} } @article {121, title = {A fast and scalable framework for large-scale and ultrahigh-dimensional sparse regression with application to the UK Biobank.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 10}, pages = {e1009141}, abstract = {

The UK Biobank is a very large, prospective population-based cohort study across the United Kingdom. It provides unprecedented opportunities for researchers to investigate the relationship between genotypic information and phenotypes of interest. Multiple regression methods, compared with genome-wide association studies (GWAS), have already been showed to greatly improve the prediction performance for a variety of phenotypes. In the high-dimensional settings, the lasso, since its first proposal in statistics, has been proved to be an effective method for simultaneous variable selection and estimation. However, the large-scale and ultrahigh dimension seen in the UK Biobank pose new challenges for applying the lasso method, as many existing algorithms and their implementations are not scalable to large applications. In this paper, we propose a computational framework called batch screening iterative lasso (BASIL) that can take advantage of any existing lasso solver and easily build a scalable solution for very large data, including those that are larger than the memory size. We introduce snpnet, an R package that implements the proposed algorithm on top of glmnet and optimizes for single nucleotide polymorphism (SNP) datasets. It currently supports l1-penalized linear model, logistic regression, Cox model, and also extends to the elastic net with l1/l2 penalty. We demonstrate results on the UK Biobank dataset, where we achieve competitive predictive performance for all four phenotypes considered (height, body mass index, asthma, high cholesterol) using only a small fraction of the variants compared with other established polygenic risk score methods.

}, keywords = {Algorithms, Asthma, Biological Specimen Banks, Body Height, Body Mass Index, Cholesterol, Cohort Studies, Genetics, Population, Genome-Wide Association Study, Genotype, Humans, Logistic Models, Phenotype, Polymorphism, Single Nucleotide, Proportional Hazards Models, United Kingdom}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1009141}, author = {Qian, Junyang and Tanigawa, Yosuke and Du, Wenfei and Aguirre, Matthew and Chang, Chris and Tibshirani, Robert and Rivas, Manuel A and Hastie, Trevor} } @article {119, title = {Fast Lasso method for large-scale and ultrahigh-dimensional Cox model with applications to UK Biobank.}, journal = {Biostatistics}, year = {2020}, month = {2020 Sep 29}, abstract = {

We develop a scalable and highly efficient algorithm to fit a Cox proportional hazard model by maximizing the $L^1$-regularized (Lasso) partial likelihood function, based on the Batch Screening Iterative Lasso (BASIL) method developed in Qian and others (2019). Our algorithm is particularly suitable for large-scale and high-dimensional data that do not fit in the memory. The output of our algorithm is the full Lasso path, the parameter estimates at all predefined regularization parameters, as well as their validation accuracy measured using the concordance index (C-index) or the validation deviance. To demonstrate the effectiveness of our algorithm, we analyze a large genotype-survival time dataset across 306 disease outcomes from the UK Biobank (Sudlow and others, 2015). We provide a publicly available implementation of the proposed approach for genetics data on top of the PLINK2 package and name it snpnet-Cox.

}, issn = {1468-4357}, doi = {10.1093/biostatistics/kxaa038}, author = {Li, Ruilin and Chang, Christopher and Justesen, Johanne M and Tanigawa, Yosuke and Qiang, Junyang and Hastie, Trevor and Rivas, Manuel A and Tibshirani, Robert} } @article {76, title = {Phenome-based approach identifies RIC1-linked Mendelian syndrome through zebrafish models, biobank associations and clinical studies.}, journal = {Nat Med}, volume = {26}, year = {2020}, month = {2020 01}, pages = {98-109}, abstract = {

Discovery of genotype-phenotype relationships remains a major challenge in clinical medicine. Here, we combined three sources of phenotypic data to uncover a new mechanism for rare and common diseases resulting from collagen secretion deficits. Using a zebrafish genetic screen, we identified the ric1 gene as being essential for skeletal biology. Using a gene-based phenome-wide association study (PheWAS) in the EHR-linked BioVU biobank, we show that reduced genetically determined expression of RIC1 is associated with musculoskeletal and dental conditions. Whole-exome sequencing identified individuals homozygous-by-descent for a rare variant in RIC1 and, through a guided clinical re-evaluation, it was discovered that they share signs with the BioVU-associated phenome. We named this new Mendelian syndrome CATIFA (cleft lip, cataract, tooth abnormality, intellectual disability, facial dysmorphism, attention-deficit hyperactivity disorder) and revealed further disease mechanisms. This gene-based, PheWAS-guided approach can accelerate the discovery of clinically relevant disease phenome and associated biological mechanisms.

}, keywords = {Abnormalities, Multiple, Animals, Behavior, Animal, Biological Specimen Banks, Chondrocytes, Disease Models, Animal, Extracellular Matrix, Fibroblasts, Guanine Nucleotide Exchange Factors, Humans, Models, Biological, Musculoskeletal System, Osteogenesis, Phenomics, Phenotype, Procollagen, Protein Transport, Secretory Pathway, Syndrome, Zebrafish, Zebrafish Proteins}, issn = {1546-170X}, doi = {10.1038/s41591-019-0705-y}, author = {Unlu, Gokhan and Qi, Xinzi and Gamazon, Eric R and Melville, David B and Patel, Nisha and Rushing, Amy R and Hashem, Mais and Al-Faifi, Abdullah and Chen, Rui and Li, Bingshan and Cox, Nancy J and Alkuraya, Fowzan S and Knapik, Ela W} } @article {56, title = {GRIK5 Genetically Regulated Expression Associated with Eye and Vascular Phenomes: Discovery through Iteration among Biobanks, Electronic Health Records, and Zebrafish.}, journal = {Am J Hum Genet}, volume = {104}, year = {2019}, month = {2019 Mar 07}, pages = {503-519}, abstract = {

Although the use of model systems for studying the mechanism of mutations that have a large effect is common, we highlight here the ways that zebrafish-model-system studies of a gene, GRIK5, that contributes to the polygenic liability to develop eye diseases have helped to illuminate a mechanism that implicates vascular biology in eye disease. A gene-expression prediction derived from a reference transcriptome panel applied to BioVU, a large electronic health record (EHR)-linked biobank at Vanderbilt University Medical Center, implicated reduced GRIK5 expression in diverse eye diseases. We tested the function of GRIK5 by depletion of its ortholog in zebrafish, and we observed reduced blood vessel numbers and integrity in the eye and increased vascular permeability. Analyses of EHRs in >2.6 million Vanderbilt subjects revealed significant comorbidity of eye and vascular diseases (relative risks 2-15); this comorbidity was confirmed in 150 million individuals from a large insurance claims dataset. Subsequent studies in >60,000 genotyped BioVU participants confirmed the association of reduced genetically predicted expression of GRIK5 with comorbid vascular and eye diseases. Our studies pioneer an approach that allows a rapid iteration of the discovery of gene-phenotype relationships to the primary genetic mechanism contributing to the pathophysiology of human disease. Our findings also add dimension to the understanding of the biology driven by glutamate receptors such as GRIK5 (also referred to as GLUK5 in protein form) and to mechanisms contributing to human eye diseases.

}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2019.01.017}, author = {Unlu, Gokhan and Gamazon, Eric R and Qi, Xinzi and Levic, Daniel S and Bastarache, Lisa and Denny, Joshua C and Roden, Dan M and Mayzus, Ilya and Breyer, Max and Zhong, Xue and Konkashbaev, Anuar I and Rzhetsky, Andrey and Knapik, Ela W and Cox, Nancy J} } @article {57, title = {Opportunities and challenges for transcriptome-wide association studies.}, journal = {Nat Genet}, volume = {51}, year = {2019}, month = {2019 04}, pages = {592-599}, abstract = {

Transcriptome-wide association studies (TWAS) integrate genome-wide association studies (GWAS) and gene expression datasets to identify gene-trait associations. In this Perspective, we explore properties of TWAS as a potential approach to prioritize causal genes at GWAS loci, by using simulations and case studies of literature-curated candidate causal genes for schizophrenia, low-density-lipoprotein cholesterol and Crohn{\textquoteright}s disease. We explore risk loci where TWAS accurately prioritizes the likely causal gene as well as loci where TWAS prioritizes multiple genes, some likely to be non-causal, owing to sharing of expression quantitative trait loci (eQTL). TWAS is especially prone to spurious prioritization with expression data from non-trait-related tissues or cell types, owing to substantial cross-cell-type variation in expression levels and eQTL strengths. Nonetheless, TWAS prioritizes candidate causal genes more accurately than simple baselines. We suggest best practices for causal-gene prioritization with TWAS and discuss future opportunities for improvement. Our results showcase the strengths and limitations of using eQTL datasets to determine causal genes at GWAS loci.

}, keywords = {Crohn Disease, Genetic Predisposition to Disease, Genetic Variation, Genome-Wide Association Study, Humans, Lipoproteins, LDL, Quantitative Trait Loci, Schizophrenia, Transcriptome}, issn = {1546-1718}, doi = {10.1038/s41588-019-0385-z}, author = {Wainberg, Michael and Sinnott-Armstrong, Nasa and Mancuso, Nicholas and Barbeira, Alvaro N and Knowles, David A and Golan, David and Ermel, Raili and Ruusalepp, Arno and Quertermous, Thomas and Hao, Ke and Bj{\"o}rkegren, Johan L M and Im, Hae Kyung and Pasaniuc, Bogdan and Rivas, Manuel A and Kundaje, Anshul} } @article {44, title = {Fine-mapping and functional studies highlight potential causal variants for rheumatoid arthritis and type 1 diabetes.}, journal = {Nat Genet}, volume = {50}, year = {2018}, month = {2018 10}, pages = {1366-1374}, abstract = {

To define potentially causal variants for autoimmune disease, we fine-mapped 76 rheumatoid arthritis (11,475 cases, 15,870 controls) and type 1 diabetes loci (9,334 cases, 11,111 controls). After sequencing 799 1-kilobase regulatory (H3K4me3) regions within these loci in 568 individuals, we observed accurate imputation for 89\% of common variants. We defined credible sets of <=5 causal variants at 5 rheumatoid arthritis and 10 type 1 diabetes loci. We identified potentially causal missense variants at DNASE1L3, PTPN22, SH2B3, and TYK2, and noncoding variants at MEG3, CD28-CTLA4, and IL2RA. We also identified potential candidate causal variants at SIRPG and TNFAIP3. Using functional assays, we confirmed allele-specific protein binding and differential enhancer activity for three variants: the CD28-CTLA4 rs117701653 SNP, MEG3 rs34552516 indel, and TNFAIP3 rs35926684 indel.

}, keywords = {Alleles, Arthritis, Rheumatoid, Case-Control Studies, CD28 Antigens, Chromosome Mapping, CTLA-4 Antigen, Diabetes Mellitus, Type 1, Gene Frequency, Genetic Loci, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Jurkat Cells, Mutation, Polymorphism, Single Nucleotide, Quantitative Trait Loci, RNA, Long Noncoding, Tumor Necrosis Factor alpha-Induced Protein 3}, issn = {1546-1718}, doi = {10.1038/s41588-018-0216-7}, author = {Westra, Harm-Jan and Mart{\'\i}nez-Bonet, Marta and Onengut-Gumuscu, Suna and Lee, Annette and Luo, Yang and Teslovich, Nikola and Worthington, Jane and Martin, Javier and Huizinga, Tom and Klareskog, Lars and Rantapaa-Dahlqvist, Solbritt and Chen, Wei-Min and Quinlan, Aaron and Todd, John A and Eyre, Steve and Nigrovic, Peter A and Gregersen, Peter K and Rich, Stephen S and Raychaudhuri, Soumya} }