@article {144, title = {A cross-population atlas of genetic associations for 220 human phenotypes.}, journal = {Nat Genet}, volume = {53}, year = {2021}, month = {2021 10}, pages = {1415-1424}, abstract = {

Current genome-wide association studies do not yet capture sufficient diversity in populations and scope of phenotypes. To expand an atlas of genetic associations in non-European populations, we conducted 220 deep-phenotype genome-wide association studies (diseases, biomarkers and medication usage) in BioBank Japan (n = 179,000), by incorporating past medical history and text-mining of electronic medical records. Meta-analyses with the UK Biobank and FinnGen (n = 628,000) identified ~5,000 new loci, which improved the resolution of the genomic map of human traits. This atlas elucidated the landscape of pleiotropy as represented by the major histocompatibility complex locus, where we conducted HLA fine-mapping. Finally, we performed statistical decomposition of matrices of phenome-wide summary statistics, and identified latent genetic components, which pinpointed responsible variants and biological mechanisms underlying current disease classifications across populations. The decomposed components enabled genetically informed subtyping of similar diseases (for example, allergic diseases). Our study suggests a potential avenue for hypothesis-free re-investigation of human diseases through genetics.

}, keywords = {ABO Blood-Group System, Biological Specimen Banks, Genetic Association Studies, Genetic Loci, Genetic Pleiotropy, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Major Histocompatibility Complex, Meta-Analysis as Topic, Mutation, Phenotype}, issn = {1546-1718}, doi = {10.1038/s41588-021-00931-x}, author = {Sakaue, Saori and Kanai, Masahiro and Tanigawa, Yosuke and Karjalainen, Juha and Kurki, Mitja and Koshiba, Seizo and Narita, Akira and Konuma, Takahiro and Yamamoto, Kenichi and Akiyama, Masato and Ishigaki, Kazuyoshi and Suzuki, Akari and Suzuki, Ken and Obara, Wataru and Yamaji, Ken and Takahashi, Kazuhisa and Asai, Satoshi and Takahashi, Yasuo and Suzuki, Takao and Shinozaki, Nobuaki and Yamaguchi, Hiroki and Minami, Shiro and Murayama, Shigeo and Yoshimori, Kozo and Nagayama, Satoshi and Obata, Daisuke and Higashiyama, Masahiko and Masumoto, Akihide and Koretsune, Yukihiro and Ito, Kaoru and Terao, Chikashi and Yamauchi, Toshimasa and Komuro, Issei and Kadowaki, Takashi and Tamiya, Gen and Yamamoto, Masayuki and Nakamura, Yusuke and Kubo, Michiaki and Murakami, Yoshinori and Yamamoto, Kazuhiko and Kamatani, Yoichiro and Palotie, Aarno and Rivas, Manuel A and Daly, Mark J and Matsuda, Koichi and Okada, Yukinori} } @article {132, title = {Disentangling selection on genetically correlated polygenic traits via whole-genome genealogies.}, journal = {Am J Hum Genet}, volume = {108}, year = {2021}, month = {2021 02 04}, pages = {219-239}, abstract = {

We present a full-likelihood method to infer polygenic adaptation from DNA sequence variation and GWAS summary statistics to quantify recent transient directional selection acting on a complex trait. Through simulations of polygenic trait architecture evolution and GWASs, we show the method substantially improves power over current methods. We examine the robustness of the method under stratification, uncertainty and bias in marginal effects, uncertainty in the causal SNPs, allelic heterogeneity, negative selection, and low GWAS sample size. The method can quantify selection acting on correlated traits, controlling for pleiotropy even among traits with strong genetic correlation (|r|=80\%) while retaining high power to attribute selection to the causal trait. When the causal trait is excluded from analysis, selection is attributed to its closest proxy. We discuss limitations of the method, cautioning against strongly causal interpretations of the results, and the possibility of undetectable gene-by-environment (GxE) interactions. We apply the method to 56 human polygenic traits, revealing signals of directional selection on pigmentation, life history, glycated hemoglobin (HbA1c), and other traits. We also conduct joint testing of 137 pairs of genetically correlated traits, revealing widespread correlated response acting on these traits (2.6-fold enrichment, p = 1.5~{\texttimes} 10). Signs of selection on some traits previously reported as adaptive (e.g., educational attainment and hair color) are largely attributable to correlated response (p = 2.9~{\texttimes} 10 and 1.7~{\texttimes} 10, respectively). Lastly, our joint test shows antagonistic selection has increased type 2 diabetes risk and decrease HbA1c (p = 1.5~{\texttimes} 10).

}, keywords = {Computer Simulation, Diabetes Mellitus, Type 2, Evolution, Molecular, Gene-Environment Interaction, Genetic Heterogeneity, Genetic Pleiotropy, Genome, Human, Genome-Wide Association Study, Glycated Hemoglobin A, Humans, Models, Genetic, Multifactorial Inheritance, Phenotype, Polymorphism, Single Nucleotide, Sample Size, Selection, Genetic}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2020.12.005}, author = {Stern, Aaron J and Speidel, Leo and Zaitlen, Noah A and Nielsen, Rasmus} } @article {130, title = {Elevated Polygenic Burden for Autism Spectrum Disorder Is Associated With the Broad Autism Phenotype in Mothers of Individuals With Autism Spectrum Disorder.}, journal = {Biol Psychiatry}, volume = {89}, year = {2021}, month = {2021 03 01}, pages = {476-485}, abstract = {

BACKGROUND: Autism spectrum disorder (ASD) is a multifactorial neurodevelopmental disorder that encompasses a complex and heterogeneous set of traits. Subclinical traits that mirror the core features of ASD, referred to as the broad autism phenotype (BAP), have been documented repeatedly in unaffected relatives and are believed to reflect underlying genetic liability to ASD. The BAP may help inform the etiology of ASD by allowing the stratification of families into more phenotypically and etiologically homogeneous subgroups. This study explores polygenic scores related to the BAP.

METHODS: Phenotypic and genotypic information were obtained from 2614 trios from the Simons Simplex Collection. Polygenic scores of ASD (ASD-PGSs) were generated across the sample to determine the shared genetic overlap between the BAP and ASD. Maternal and paternal ASD-PGSs were explored in relation to BAP traits and their child{\textquoteright}s ASD symptomatology.

RESULTS: Maternal pragmatic language was related to child{\textquoteright}s social communicative atypicalities. In fathers, rigid personality was related to increased repetitive behaviors in children. Maternal (but not paternal) ASD-PGSs were related to the pragmatic language and rigid BAP domains.

CONCLUSIONS: Associations emerged between parent and child phenotypes, with more associations emerging in mothers than in fathers. ASD-PGS associations emerged with BAP in mothers only, highlighting the potential for a female protective factor, and implicating the polygenic etiology of ASD-related phenotypes in the BAP.

}, keywords = {Autism Spectrum Disorder, Autistic Disorder, Child, Fathers, Female, Humans, Male, Mothers, Phenotype}, issn = {1873-2402}, doi = {10.1016/j.biopsych.2020.08.029}, author = {Nayar, Kritika and Sealock, Julia M and Maltman, Nell and Bush, Lauren and Cook, Edwin H and Davis, Lea K and Losh, Molly} } @article {131, title = {Graphical analysis for phenome-wide causal discovery in genotyped population-scale biobanks.}, journal = {Nat Commun}, volume = {12}, year = {2021}, month = {2021 01 13}, pages = {350}, abstract = {

Causal inference via Mendelian randomization requires making strong assumptions about horizontal pleiotropy, where genetic instruments are connected to the outcome not only through the exposure. Here, we present causal Graphical Analysis Using Genetics (cGAUGE), a pipeline that overcomes these limitations using instrument filters with provable properties. This is achievable by identifying conditional independencies while examining multiple traits. cGAUGE also uses ExSep (Exposure-based Separation), a novel test for the existence of causal pathways that does not require selecting instruments. In simulated data we illustrate how cGAUGE can reduce the empirical false discovery rate by up to 30\%, while retaining the majority of true discoveries. On 96 complex traits from 337,198 subjects from the UK Biobank, our results cover expected causal links and many new ones that were previously suggested by correlation-based observational studies. Notably, we identify multiple risk factors for cardiovascular disease, including red blood cell distribution width.

}, keywords = {Biological Specimen Banks, Cardiovascular Diseases, Causality, Computer Simulation, Gene Regulatory Networks, Genetic Pleiotropy, Genetic Variation, Genome-Wide Association Study, Genotype, Humans, Mendelian Randomization Analysis, Models, Theoretical, Multifactorial Inheritance, Phenotype, Risk Factors}, issn = {2041-1723}, doi = {10.1038/s41467-020-20516-2}, author = {Amar, David and Sinnott-Armstrong, Nasa and Ashley, Euan A and Rivas, Manuel A} } @article {146, title = {Sub-genic intolerance, ClinVar, and the epilepsies: A whole-exome sequencing study of 29,165 individuals.}, journal = {Am J Hum Genet}, volume = {108}, year = {2021}, month = {2021 06 03}, pages = {965-982}, abstract = {

Both mild and severe epilepsies are influenced by variants in the same genes, yet an explanation for the resulting phenotypic variation is unknown. As part of the ongoing Epi25 Collaboration, we performed a whole-exome sequencing analysis of 13,487 epilepsy-affected individuals and 15,678 control individuals. While prior Epi25 studies focused on gene-based collapsing analyses, we asked how the pattern of variation within genes differs by epilepsy type. Specifically, we compared the genetic architectures of severe developmental and epileptic encephalopathies (DEEs) and two generally less severe epilepsies, genetic generalized epilepsy and non-acquired focal epilepsy (NAFE). Our gene-based rare variant collapsing analysis used geographic ancestry-based clustering that included broader ancestries than previously possible and revealed novel associations. Using the missense intolerance ratio (MTR), we found that variants in DEE-affected individuals are in significantly more intolerant genic sub-regions than those in NAFE-affected individuals. Only previously reported pathogenic variants absent in available genomic datasets showed a significant burden in epilepsy-affected individuals compared with control individuals, and the ultra-rare pathogenic variants associated with DEE were located in more intolerant genic sub-regions than variants associated with non-DEE epilepsies. MTR filtering improved the yield of ultra-rare pathogenic variants in affected individuals compared with control individuals. Finally, analysis of variants in genes without a disease association revealed a significant burden of loss-of-function variants in the genes most intolerant to such variation, indicating additional epilepsy-risk genes yet to be discovered. Taken together, our study suggests that genic and sub-genic intolerance are critical characteristics for interpreting the effects of variation in genes that influence epilepsy.

}, keywords = {Case-Control Studies, Cohort Studies, Epilepsy, Exome, Genetic Markers, Genetic Predisposition to Disease, Genetic Testing, Genetic Variation, Humans, Phenotype, Whole Exome Sequencing}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2021.04.009} } @article {91, title = {Assessing Digital Phenotyping to Enhance Genetic Studies of Human Diseases.}, journal = {Am J Hum Genet}, volume = {106}, year = {2020}, month = {2020 05 07}, pages = {611-622}, abstract = {

Population-scale biobanks that combine genetic data and high-dimensional phenotyping for a large number of participants provide an exciting opportunity to perform genome-wide association studies (GWAS) to identify genetic variants associated with diverse quantitative traits and diseases. A major challenge for GWAS in population biobanks is ascertaining disease cases from heterogeneous data sources such as hospital records, digital questionnaire responses, or interviews. In this study, we use genetic parameters, including genetic correlation, to evaluate whether GWAS performed using cases in the UK Biobank ascertained from hospital records, questionnaire responses, and family history of disease implicate similar disease genetics across a range of effect sizes. We find that hospital record and questionnaire GWAS largely identify similar genetic effects for many complex phenotypes and that combining together both phenotyping methods improves power to detect genetic associations. We also show that family history GWAS using cases ascertained on family history of disease agrees with combined hospital record and questionnaire GWAS and that family history GWAS has better power to detect genetic associations for some phenotypes. Overall, this work demonstrates that digital phenotyping and unstructured phenotype data can be combined with structured data such as hospital records to identify cases for GWAS in biobanks and improve the ability of such studies to identify genetic associations.

}, keywords = {Asthma, Databases, Factual, Disease, Female, Genetics, Medical, Genome-Wide Association Study, Genotype, Humans, Male, Neoplasms, Phenotype, United Kingdom}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2020.03.007}, author = {DeBoever, Christopher and Tanigawa, Yosuke and Aguirre, Matthew and McInnes, Greg and Lavertu, Adam and Rivas, Manuel A} } @article {85, title = {A brief history of human disease genetics.}, journal = {Nature}, volume = {577}, year = {2020}, month = {2020 01}, pages = {179-189}, abstract = {

A primary goal of human genetics is to identify DNA sequence variants that influence biomedical traits, particularly those related to the onset and progression of human disease. Over the past 25 years, progress in realizing this objective has been transformed by advances in technology, foundational genomic resources and analytical tools, and by access to vast amounts of genotype and phenotype data. Genetic discoveries have substantially improved~our understanding of the mechanisms responsible for many rare and common diseases and driven development of novel preventative and therapeutic strategies. Medical innovation will increasingly focus on delivering care tailored to individual patterns of genetic predisposition.

}, keywords = {Animals, Genetic Testing, Genetic Variation, Genomics, Genotype, Humans, Phenotype, Rare Diseases}, issn = {1476-4687}, doi = {10.1038/s41586-019-1879-7}, author = {Claussnitzer, Melina and Cho, Judy H and Collins, Rory and Cox, Nancy J and Dermitzakis, Emmanouil T and Hurles, Matthew E and Kathiresan, Sekar and Kenny, Eimear E and Lindgren, Cecilia M and MacArthur, Daniel G and North, Kathryn N and Plon, Sharon E and Rehm, Heidi L and Risch, Neil and Rotimi, Charles N and Shendure, Jay and Soranzo, Nicole and McCarthy, Mark I} } @article {99, title = {Dynamic incorporation of multiple in silico functional annotations empowers rare variant association analysis of large whole-genome sequencing studies at scale.}, journal = {Nat Genet}, volume = {52}, year = {2020}, month = {2020 09}, pages = {969-983}, abstract = {

Large-scale whole-genome sequencing studies have enabled the analysis of rare variants (RVs) associated with complex phenotypes. Commonly used RV association tests have limited scope to leverage variant functions. We propose STAAR (variant-set test for association using annotation information), a scalable and powerful RV association test method that effectively incorporates both variant categories and multiple complementary annotations using a dynamic weighting scheme. For the latter, we introduce {\textquoteright}annotation principal components{\textquoteright}, multidimensional summaries of in silico variant annotations. STAAR accounts for population structure and relatedness and is scalable for analyzing very large cohort and biobank whole-genome sequencing studies of continuous and dichotomous traits. We applied STAAR to identify RVs associated with four lipid traits in 12,316 discovery and 17,822 replication samples from the Trans-Omics for Precision Medicine Program. We discovered and replicated new RV associations, including disruptive missense RVs of NPC1L1 and an intergenic region near APOC1P1 associated with low-density lipoprotein cholesterol.

}, keywords = {Cholesterol, LDL, Computer Simulation, Genetic Predisposition to Disease, Genetic Variation, Genome, Genome-Wide Association Study, Humans, Models, Genetic, Molecular Sequence Annotation, Phenotype, Whole Genome Sequencing}, issn = {1546-1718}, doi = {10.1038/s41588-020-0676-4}, author = {Li, Xihao and Li, Zilin and Zhou, Hufeng and Gaynor, Sheila M and Liu, Yaowu and Chen, Han and Sun, Ryan and Dey, Rounak and Arnett, Donna K and Aslibekyan, Stella and Ballantyne, Christie M and Bielak, Lawrence F and Blangero, John and Boerwinkle, Eric and Bowden, Donald W and Broome, Jai G and Conomos, Matthew P and Correa, Adolfo and Cupples, L Adrienne and Curran, Joanne E and Freedman, Barry I and Guo, Xiuqing and Hindy, George and Irvin, Marguerite R and Kardia, Sharon L R and Kathiresan, Sekar and Khan, Alyna T and Kooperberg, Charles L and Laurie, Cathy C and Liu, X Shirley and Mahaney, Michael C and Manichaikul, Ani W and Martin, Lisa W and Mathias, Rasika A and McGarvey, Stephen T and Mitchell, Braxton D and Montasser, May E and Moore, Jill E and Morrison, Alanna C and O{\textquoteright}Connell, Jeffrey R and Palmer, Nicholette D and Pampana, Akhil and Peralta, Juan M and Peyser, Patricia A and Psaty, Bruce M and Redline, Susan and Rice, Kenneth M and Rich, Stephen S and Smith, Jennifer A and Tiwari, Hemant K and Tsai, Michael Y and Vasan, Ramachandran S and Wang, Fei Fei and Weeks, Daniel E and Weng, Zhiping and Wilson, James G and Yanek, Lisa R and Neale, Benjamin M and Sunyaev, Shamil R and Abecasis, Gon{\c c}alo R and Rotter, Jerome I and Willer, Cristen J and Peloso, Gina M and Natarajan, Pradeep and Lin, Xihong} } @article {82, title = {Electronic health record phenotypes associated with genetically regulated expression of CFTR and application to cystic fibrosis.}, journal = {Genet Med}, volume = {22}, year = {2020}, month = {2020 07}, pages = {1191-1200}, abstract = {

PURPOSE: The increasing use of electronic health records (EHRs) and biobanks offers unique opportunities to study Mendelian diseases. We described a novel approach to summarize clinical manifestations from patient EHRs into phenotypic evidence for cystic fibrosis (CF) with potential to alert unrecognized patients of the disease.

METHODS: We estimated genetically predicted expression (GReX) of cystic fibrosis transmembrane conductance regulator (CFTR) and tested for association with clinical diagnoses in the Vanderbilt University biobank (N = 9142 persons of European descent with 71 cases of CF). The top associated EHR phenotypes were assessed in combination as a phenotype risk score (PheRS) for discriminating CF case status in an additional 2.8 million patients from Vanderbilt University Medical Center (VUMC) and 125,305 adult patients including 25,314 CF cases from MarketScan, an independent external cohort.

RESULTS: GReX of CFTR was associated with EHR phenotypes consistent with CF. PheRS constructed using the EHR phenotypes and weights discovered by the genetic associations improved discriminative power for CF over the initially proposed PheRS in both VUMC and MarketScan.

CONCLUSION: Our study demonstrates the power of EHRs for clinical description of CF and the benefits of using a genetics-informed weighing scheme in construction of a phenotype risk score. This research may find broad applications for phenomic studies of Mendelian disease genes.

}, keywords = {Adult, Cystic Fibrosis, Cystic Fibrosis Transmembrane Conductance Regulator, Electronic Health Records, Humans, Mutation, Phenotype}, issn = {1530-0366}, doi = {10.1038/s41436-020-0786-5}, author = {Zhong, Xue and Yin, Zhijun and Jia, Gengjie and Zhou, Dan and Wei, Qiang and Faucon, Annika and Evans, Patrick and Gamazon, Eric R and Li, Bingshan and Tao, Ran and Rzhetsky, Andrey and Bastarache, Lisa and Cox, Nancy J} } @article {121, title = {A fast and scalable framework for large-scale and ultrahigh-dimensional sparse regression with application to the UK Biobank.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 10}, pages = {e1009141}, abstract = {

The UK Biobank is a very large, prospective population-based cohort study across the United Kingdom. It provides unprecedented opportunities for researchers to investigate the relationship between genotypic information and phenotypes of interest. Multiple regression methods, compared with genome-wide association studies (GWAS), have already been showed to greatly improve the prediction performance for a variety of phenotypes. In the high-dimensional settings, the lasso, since its first proposal in statistics, has been proved to be an effective method for simultaneous variable selection and estimation. However, the large-scale and ultrahigh dimension seen in the UK Biobank pose new challenges for applying the lasso method, as many existing algorithms and their implementations are not scalable to large applications. In this paper, we propose a computational framework called batch screening iterative lasso (BASIL) that can take advantage of any existing lasso solver and easily build a scalable solution for very large data, including those that are larger than the memory size. We introduce snpnet, an R package that implements the proposed algorithm on top of glmnet and optimizes for single nucleotide polymorphism (SNP) datasets. It currently supports l1-penalized linear model, logistic regression, Cox model, and also extends to the elastic net with l1/l2 penalty. We demonstrate results on the UK Biobank dataset, where we achieve competitive predictive performance for all four phenotypes considered (height, body mass index, asthma, high cholesterol) using only a small fraction of the variants compared with other established polygenic risk score methods.

}, keywords = {Algorithms, Asthma, Biological Specimen Banks, Body Height, Body Mass Index, Cholesterol, Cohort Studies, Genetics, Population, Genome-Wide Association Study, Genotype, Humans, Logistic Models, Phenotype, Polymorphism, Single Nucleotide, Proportional Hazards Models, United Kingdom}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1009141}, author = {Qian, Junyang and Tanigawa, Yosuke and Du, Wenfei and Aguirre, Matthew and Chang, Chris and Tibshirani, Robert and Rivas, Manuel A and Hastie, Trevor} } @article {75, title = {Genetic diagnoses in epilepsy: The impact of dynamic exome analysis in a pediatric cohort.}, journal = {Epilepsia}, volume = {61}, year = {2020}, month = {2020 02}, pages = {249-258}, abstract = {

OBJECTIVE: We evaluated the yield of systematic analysis and/or reanalysis of whole exome sequencing (WES) data from a cohort of well-phenotyped pediatric patients with epilepsy and suspected but previously undetermined genetic etiology.

METHODS: We identified and phenotyped 125 participants with pediatric epilepsy. Etiology was unexplained at the time of enrollment despite clinical testing, which included chromosomal microarray (57 patients), epilepsy gene panel (n = 48), both (n = 28), or WES (n = 8). Clinical epilepsy diagnoses included developmental and epileptic encephalopathy (DEE), febrile infection-related epilepsy syndrome, Rasmussen encephalitis, and other focal and generalized epilepsies. We analyzed WES data and compared the yield in participants with and without prior clinical genetic testing.

RESULTS: Overall, we identified pathogenic or likely pathogenic variants in 40\% (50/125) of our study participants. Nine patients with DEE had genetic variants in recently published genes that had not been recognized as epilepsy-related at the time of clinical testing (FGF12, GABBR1, GABBR2, ITPA, KAT6A, PTPN23, RHOBTB2, SATB2), and eight patients had genetic variants in candidate epilepsy genes (CAMTA1, FAT3, GABRA6, HUWE1, PTCHD1). Ninety participants had concomitant or subsequent clinical genetic testing, which was ultimately explanatory for 26\% (23/90). Of the 67 participants whose molecular diagnoses were "unsolved" through clinical genetic testing, we identified pathogenic or likely pathogenic variants in 17 (25\%).

SIGNIFICANCE: Our data argue for early consideration of WES with iterative reanalysis for patients with epilepsy, particularly those with DEE or epilepsy with intellectual disability. Rigorous analysis of WES data of well-phenotyped patients with epilepsy leads to a broader understanding of gene-specific phenotypic spectra as well as candidate disease gene identification. We illustrate the dynamic nature of genetic diagnosis over time, with analysis and in some cases reanalysis of exome data leading to the identification of disease-associated variants among participants with previously nondiagnostic results from a variety of clinical testing strategies.

}, keywords = {Adolescent, Adult, Age of Onset, Brain Diseases, Child, Child, Preschool, Chromosomes, Human, Cohort Studies, Epilepsy, Epilepsy, Generalized, Exome, Female, Genetic Testing, Genetic Variation, Humans, Infant, Male, Microarray Analysis, Phenotype, Whole Exome Sequencing, Young Adult}, issn = {1528-1167}, doi = {10.1111/epi.16427}, author = {Rochtus, Anne and Olson, Heather E and Smith, Lacey and Keith, Louisa G and El Achkar, Christelle and Taylor, Alan and Mahida, Sonal and Park, Meredith and Kelly, McKenna and Shain, Catherine and Rockowitz, Shira and Rosen Sheidley, Beth and Poduri, Annapurna} } @article {124, title = {Modeling epistasis in mice and yeast using the proportion of two or more distinct genetic backgrounds: Evidence for "polygenic epistasis".}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 10}, pages = {e1009165}, abstract = {

BACKGROUND: The majority of quantitative genetic models used to map complex traits assume that alleles have similar effects across all individuals. Significant evidence suggests, however, that epistatic interactions modulate the impact of many alleles. Nevertheless, identifying epistatic interactions remains computationally and statistically challenging. In this work, we address some of these challenges by developing a statistical test for polygenic epistasis that determines whether the effect of an allele is altered by the global genetic ancestry proportion from distinct progenitors.

RESULTS: We applied our method to data from mice and yeast. For the mice, we observed 49 significant genotype-by-ancestry interaction associations across 14 phenotypes as well as over 1,400 Bonferroni-corrected genotype-by-ancestry interaction associations for mouse gene expression data. For the yeast, we observed 92 significant genotype-by-ancestry interactions across 38 phenotypes. Given this evidence of epistasis, we test for and observe evidence of rapid selection pressure on ancestry specific polymorphisms within one of the cohorts, consistent with epistatic selection.

CONCLUSIONS: Unlike our prior work in human populations, we observe widespread evidence of ancestry-modified SNP effects, perhaps reflecting the greater divergence present in crosses using mice and yeast.

}, keywords = {Alleles, Animals, Epistasis, Genetic, Evolution, Molecular, Genotype, Humans, Mice, Models, Genetic, Multifactorial Inheritance, Phenotype, Quantitative Trait Loci, Saccharomyces cerevisiae, Selection, Genetic}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1009165}, author = {Rau, Christoph D and Gonzales, Natalia M and Bloom, Joshua S and Park, Danny and Ayroles, Julien and Palmer, Abraham A and Lusis, Aldons J and Zaitlen, Noah} } @article {76, title = {Phenome-based approach identifies RIC1-linked Mendelian syndrome through zebrafish models, biobank associations and clinical studies.}, journal = {Nat Med}, volume = {26}, year = {2020}, month = {2020 01}, pages = {98-109}, abstract = {

Discovery of genotype-phenotype relationships remains a major challenge in clinical medicine. Here, we combined three sources of phenotypic data to uncover a new mechanism for rare and common diseases resulting from collagen secretion deficits. Using a zebrafish genetic screen, we identified the ric1 gene as being essential for skeletal biology. Using a gene-based phenome-wide association study (PheWAS) in the EHR-linked BioVU biobank, we show that reduced genetically determined expression of RIC1 is associated with musculoskeletal and dental conditions. Whole-exome sequencing identified individuals homozygous-by-descent for a rare variant in RIC1 and, through a guided clinical re-evaluation, it was discovered that they share signs with the BioVU-associated phenome. We named this new Mendelian syndrome CATIFA (cleft lip, cataract, tooth abnormality, intellectual disability, facial dysmorphism, attention-deficit hyperactivity disorder) and revealed further disease mechanisms. This gene-based, PheWAS-guided approach can accelerate the discovery of clinically relevant disease phenome and associated biological mechanisms.

}, keywords = {Abnormalities, Multiple, Animals, Behavior, Animal, Biological Specimen Banks, Chondrocytes, Disease Models, Animal, Extracellular Matrix, Fibroblasts, Guanine Nucleotide Exchange Factors, Humans, Models, Biological, Musculoskeletal System, Osteogenesis, Phenomics, Phenotype, Procollagen, Protein Transport, Secretory Pathway, Syndrome, Zebrafish, Zebrafish Proteins}, issn = {1546-170X}, doi = {10.1038/s41591-019-0705-y}, author = {Unlu, Gokhan and Qi, Xinzi and Gamazon, Eric R and Melville, David B and Patel, Nisha and Rushing, Amy R and Hashem, Mais and Al-Faifi, Abdullah and Chen, Rui and Li, Bingshan and Cox, Nancy J and Alkuraya, Fowzan S and Knapik, Ela W} } @article {128, title = {A phenome-wide association study of 26 mendelian genes reveals phenotypic expressivity of common and rare variants within the general population.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 11}, pages = {e1008802}, abstract = {

The clinical evaluation of a genetic syndrome relies upon recognition of a characteristic pattern of signs or symptoms to guide targeted genetic testing for confirmation of the diagnosis. However, individuals displaying a single phenotype of a complex syndrome may not meet criteria for clinical diagnosis or genetic testing. Here, we present a phenome-wide association study (PheWAS) approach to systematically explore the phenotypic expressivity of common and rare alleles in genes associated with four well-described syndromic diseases (Alagille (AS), Marfan (MS), DiGeorge (DS), and Noonan (NS) syndromes) in the general population. Using human phenotype ontology (HPO) terms, we systematically mapped 60 phenotypes related to AS, MS, DS and NS in 337,198 unrelated white British from the UK Biobank (UKBB) based on their hospital admission records, self-administrated questionnaires, and physiological measurements. We performed logistic regression adjusting for age, sex, and the first 5 genetic principal components, for each phenotype and each variant in the target genes (JAG1, NOTCH2 FBN1, PTPN1 and RAS-opathy genes, and genes in the 22q11.2 locus) and performed a gene burden test. Overall, we observed multiple phenotype-genotype correlations, such as the association between variation in JAG1, FBN1, PTPN11 and SOS2 with diastolic and systolic blood pressure; and pleiotropy among multiple variants in syndromic genes. For example, rs11066309 in PTPN11 was significantly associated with a lower body mass index, an increased risk of hypothyroidism and a smaller size for gestational age, all in concordance with NS-related phenotypes. Similarly, rs589668 in FBN1 was associated with an increase in body height and blood pressure, and a reduced body fat percentage as observed in Marfan syndrome. Our findings suggest that the spectrum of associations of common and rare variants in genes involved in syndromic diseases can be extended to individual phenotypes within the general population.

}, keywords = {Alagille Syndrome, Alleles, Biological Variation, Population, DiGeorge Syndrome, European Continental Ancestry Group, Female, Gene Frequency, Genetic Association Studies, Genetic Predisposition to Disease, Genetic Testing, Genetic Variation, Genome-Wide Association Study, Humans, Male, Marfan Syndrome, Noonan Syndrome, Phenotype, Polymorphism, Single Nucleotide, United Kingdom}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1008802}, author = {Tcheandjieu, Catherine and Aguirre, Matthew and Gustafsson, Stefan and Saha, Priyanka and Potiny, Praneetha and Haendel, Melissa and Ingelsson, Erik and Rivas, Manuel A and Priest, James R} } @article {86, title = {A Robust Method Uncovers Significant Context-Specific Heritability in Diverse Complex Traits.}, journal = {Am J Hum Genet}, volume = {106}, year = {2020}, month = {2020 01 02}, pages = {71-91}, abstract = {

Gene-environment interactions (GxE) can be fundamental in applications ranging from functional genomics to precision medicine and is a conjectured source of substantial heritability. However, unbiased methods to profile GxE genome-wide are nascent and, as we show, cannot accommodate general environment variables, modest sample sizes, heterogeneous noise, and binary traits. To address this gap, we propose a simple, unifying mixed model for gene-environment interaction (GxEMM). In simulations and theory, we show that GxEMM can dramatically improve estimates and eliminate false positives when the assumptions of existing methods fail. We apply GxEMM to a range of human and model organism datasets and find broad evidence of context-specific genetic effects, including GxSex, GxAdversity, and GxDisease interactions across thousands of clinical and molecular phenotypes. Overall, GxEMM is broadly applicable for testing and quantifying polygenic interactions, which can be useful for explaining heritability and invaluable for determining biologically relevant environments.

}, keywords = {Adult, Animals, Computer Simulation, Female, Gene-Environment Interaction, Genetic Markers, Genome-Wide Association Study, Humans, Male, Mental Disorders, Middle Aged, Models, Genetic, Multifactorial Inheritance, Phenomics, Phenotype, Rats}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2019.11.015}, author = {Dahl, Andy and Nguyen, Khiem and Cai, Na and Gandal, Michael J and Flint, Jonathan and Zaitlen, Noah} } @article {88, title = {Whole-Genome and RNA Sequencing Reveal Variation and Transcriptomic Coordination in the Developing Human Prefrontal Cortex.}, journal = {Cell Rep}, volume = {31}, year = {2020}, month = {2020 04 07}, pages = {107489}, abstract = {

Gene expression levels vary across developmental stage, cell type, and region in the brain. Genomic variants also contribute to the variation in expression, and some neuropsychiatric disorder loci may exert their effects through this mechanism. To investigate these relationships, we present BrainVar, a unique resource of paired whole-genome and bulk tissue RNA sequencing from the dorsolateral prefrontal cortex of 176 individuals across prenatal and postnatal development. Here we identify common variants that alter gene expression (expression quantitative trait loci [eQTLs]) constantly across development or predominantly during prenatal or postnatal stages. Both "constant" and "temporal-predominant" eQTLs are enriched for loci associated with neuropsychiatric traits and disorders and colocalize with specific variants. Expression levels of more than 12,000 genes rise or fall in a concerted late-fetal transition, with the transitional genes enriched for cell-type-specific genes and neuropsychiatric risk loci, underscoring the importance of cataloging developmental trajectories in understanding cortical physiology and pathology.

}, keywords = {Base Sequence, Brain, Computational Biology, Databases, Genetic, Genetic Predisposition to Disease, Genetic Variation, Genome-Wide Association Study, Genomics, Humans, Phenotype, Polymorphism, Single Nucleotide, Prefrontal Cortex, Quantitative Trait Loci, Sequence Analysis, RNA, Transcriptome, Whole Exome Sequencing, Whole Genome Sequencing}, issn = {2211-1247}, doi = {10.1016/j.celrep.2020.03.053}, author = {Werling, Donna M and Pochareddy, Sirisha and Choi, Jinmyung and An, Joon-Yong and Sheppard, Brooke and Peng, Minshi and Li, Zhen and Dastmalchi, Claudia and Santpere, Gabriel and Sousa, Andr{\'e} M M and Tebbenkamp, Andrew T N and Kaur, Navjot and Gulden, Forrest O and Breen, Michael S and Liang, Lindsay and Gilson, Michael C and Zhao, Xuefang and Dong, Shan and Klei, Lambertus and Cicek, A Ercument and Buxbaum, Joseph D and Adle-Biassette, Homa and Thomas, Jean-Leon and Aldinger, Kimberly A and O{\textquoteright}Day, Diana R and Glass, Ian A and Zaitlen, Noah A and Talkowski, Michael E and Roeder, Kathryn and State, Matthew W and Devlin, Bernie and Sanders, Stephan J and Sestan, Nenad} } @article {59, title = {Adjusting for Principal Components of Molecular Phenotypes Induces Replicating False Positives.}, journal = {Genetics}, volume = {211}, year = {2019}, month = {2019 04}, pages = {1179-1189}, abstract = {

High-throughput measurements of molecular phenotypes provide an unprecedented opportunity to model cellular processes and their impact on disease. These highly structured datasets are usually strongly confounded, creating false positives and reducing power. This has motivated many approaches based on principal components analysis (PCA) to estimate and correct for confounders, which have become indispensable elements of association tests between molecular phenotypes and both genetic and nongenetic factors. Here, we show that these correction approaches induce a bias, and that it persists for large sample sizes and replicates out-of-sample. We prove this theoretically for PCA by deriving an analytic, deterministic, and intuitive bias approximation. We assess other methods with realistic simulations, which show that perturbing any of several basic parameters can cause false positive rate (FPR) inflation. Our experiments show the bias depends on covariate and confounder sparsity, effect sizes, and their correlation. Surprisingly, when the covariate and confounder have [Formula: see text], standard two-step methods all have [Formula: see text]-fold FPR inflation. Our analysis informs best practices for confounder correction in genomic studies, and suggests many false discoveries have been made and replicated in some differential expression analyses.

}, keywords = {Animals, Genome-Wide Association Study, Humans, Models, Genetic, Phenotype, Principal Component Analysis, Quantitative Trait Loci, Reproducibility of Results}, issn = {1943-2631}, doi = {10.1534/genetics.118.301768}, author = {Dahl, Andy and Guillemot, Vincent and Mefford, Joel and Aschard, Hugues and Zaitlen, Noah} } @article {55, title = {Methods for the Analysis and Interpretation for Rare Variants Associated with Complex Traits.}, journal = {Curr Protoc Hum Genet}, volume = {101}, year = {2019}, month = {2019 04}, pages = {e83}, abstract = {

With the advent of Next Generation Sequencing (NGS) technologies, whole genome and whole exome DNA sequencing has become affordable for routine genetic studies. Coupled with improved genotyping arrays and genotype imputation methodologies, it is increasingly feasible to obtain rare genetic variant information in large datasets. Such datasets allow researchers to gain a more complete understanding of the genetic architecture of complex traits caused by rare variants. State-of-the-art statistical methods for the statistical genetics analysis of sequence-based association, including efficient algorithms for association analysis in biobank-scale datasets, gene-association tests, meta-analysis, fine mapping methods that integrate functional genomic dataset, and phenome-wide association studies (PheWAS), are reviewed here. These methods are expected to be highly useful for next generation statistical genetics analysis in the era of precision medicine. {\textcopyright} 2019 by John Wiley \& Sons, Inc.

}, keywords = {Algorithms, Genetic Predisposition to Disease, Genome, Human, Genome-Wide Association Study, Genotype, High-Throughput Nucleotide Sequencing, Humans, Multifactorial Inheritance, Phenotype, Polymorphism, Single Nucleotide, Whole Exome Sequencing, Whole Genome Sequencing}, issn = {1934-8258}, doi = {10.1002/cphg.83}, author = {Weissenkampen, J Dylan and Jiang, Yu and Eckert, Scott and Jiang, Bibo and Li, Bingshan and Liu, Dajiang J} } @article {37, title = {An ancestry-based approach for detecting interactions.}, journal = {Genet Epidemiol}, volume = {42}, year = {2018}, month = {2018 02}, pages = {49-63}, abstract = {

BACKGROUND: Epistasis and gene-environment interactions are known to contribute significantly to variation of complex phenotypes in model organisms. However, their identification in human association studies remains challenging for myriad reasons. In the case of epistatic interactions, the large number of potential interacting sets of genes presents computational, multiple hypothesis correction, and other statistical power issues. In the case of gene-environment interactions, the lack of consistently measured environmental covariates in most disease studies precludes searching for interactions and creates difficulties for replicating studies.

RESULTS: In this work, we develop a new statistical approach to address these issues that leverages genetic ancestry, defined as the proportion of ancestry derived from each ancestral population (e.g., the fraction of European/African ancestry in African Americans), in admixed populations. We applied our method to gene expression and methylation data from African American and Latino admixed individuals, respectively, identifying nine interactions that were significant at P<5{\texttimes}10-8. We show that two of the interactions in methylation data replicate, and the remaining six are significantly enriched for low P-values (P<1.8{\texttimes}10-6).

CONCLUSION: We show that genetic ancestry can be a useful proxy for unknown and unmeasured covariates in the search for interaction effects. These results have important implications for our understanding of the genetic architecture of complex traits.

}, keywords = {African Americans, African Continental Ancestry Group, DNA Methylation, Epistasis, Genetic, European Continental Ancestry Group, Gene-Environment Interaction, Hispanic Americans, Humans, Models, Genetic, Phenotype}, issn = {1098-2272}, doi = {10.1002/gepi.22087}, author = {Park, Danny S and Eskin, Itamar and Kang, Eun Yong and Gamazon, Eric R and Eng, Celeste and Gignoux, Christopher R and Galanter, Joshua M and Burchard, Esteban and Ye, Chun J and Aschard, Hugues and Eskin, Eleazar and Halperin, Eran and Zaitlen, Noah} } @article {28, title = {Medical relevance of protein-truncating variants across 337,205 individuals in the UK Biobank study.}, journal = {Nat Commun}, volume = {9}, year = {2018}, month = {2018 04 24}, pages = {1612}, abstract = {

Protein-truncating variants can have profound effects on gene function and are critical for clinical genome interpretation and generating therapeutic hypotheses, but their relevance to medical phenotypes has not been systematically assessed. Here, we characterize the effect of 18,228 protein-truncating variants across 135 phenotypes from the UK Biobank and find 27 associations between medical phenotypes and protein-truncating variants in genes outside the major histocompatibility complex. We perform phenome-wide analyses and directly measure the effect in homozygous carriers, commonly referred to as "human knockouts," across medical phenotypes for genes implicated as being protective against disease or associated with at least one phenotype in our study. We find several genes with strong pleiotropic or non-additive effects. Our results illustrate the importance of protein-truncating variants in a variety of diseases.

}, keywords = {Databases, Nucleic Acid, Genome-Wide Association Study, Humans, Phenotype, Proteins, Sequence Deletion, United Kingdom}, issn = {2041-1723}, doi = {10.1038/s41467-018-03910-9}, author = {DeBoever, Christopher and Tanigawa, Yosuke and Lindholm, Malene E and McInnes, Greg and Lavertu, Adam and Ingelsson, Erik and Chang, Chris and Ashley, Euan A and Bustamante, Carlos D and Daly, Mark J and Rivas, Manuel A} } @article {12, title = {Multiple phenotype association tests using summary statistics in genome-wide association studies.}, journal = {Biometrics}, volume = {74}, year = {2018}, month = {2018 Mar}, pages = {165-175}, abstract = {

We study in this article jointly testing the associations of a genetic variant with correlated multiple phenotypes using the summary statistics of individual phenotype analysis from Genome-Wide Association Studies (GWASs). We estimated the between-phenotype correlation matrix using the summary statistics of individual phenotype GWAS analyses, and developed genetic association tests for multiple phenotypes by accounting for between-phenotype correlation without the need to access individual-level data. Since genetic variants often affect multiple phenotypes differently across the genome and the between-phenotype correlation can be arbitrary, we proposed robust and powerful multiple phenotype testing procedures by jointly testing a common mean and a variance component in linear mixed models for summary statistics. We computed the p-values of the proposed tests analytically. This computational advantage makes our methods practically appealing in large-scale GWASs. We performed simulation studies to show that the proposed tests maintained correct type I error rates, and to compare their powers in various settings with the existing methods. We applied the proposed tests to a GWAS Global Lipids Genetics Consortium summary statistics data set and identified additional genetic variants that were missed by the original single-trait analysis.

}, keywords = {Analysis of Variance, Computer Simulation, Genome-Wide Association Study, Humans, Linear Models, Lipids, Models, Genetic, Phenotype}, issn = {1541-0420}, doi = {10.1111/biom.12735}, author = {Liu, Zhonghua and Lin, Xihong} } @article {29, title = {Phenotype risk scores identify patients with unrecognized Mendelian disease patterns.}, journal = {Science}, volume = {359}, year = {2018}, month = {2018 03 16}, pages = {1233-1239}, abstract = {

Genetic association studies often examine features independently, potentially missing subpopulations with multiple phenotypes that share a single cause. We describe an approach that aggregates phenotypes on the basis of patterns described by Mendelian diseases. We mapped the clinical features of 1204 Mendelian diseases into phenotypes captured from the electronic health record (EHR) and summarized this evidence as phenotype risk scores (PheRSs). In an initial validation, PheRS distinguished cases and controls of five Mendelian diseases. Applying PheRS to 21,701 genotyped individuals uncovered 18 associations between rare variants and phenotypes consistent with Mendelian diseases. In 16 patients, the rare genetic variants were associated with severe outcomes such as organ transplants. PheRS can augment rare-variant interpretation and may identify subsets of patients with distinct genetic causes for common diseases.

}, keywords = {Databases, Genetic, DNA Mutational Analysis, Electronic Health Records, Exome, Genetic Association Studies, Genetic Diseases, Inborn, Genetic Predisposition to Disease, Genetic Variation, Humans, Phenotype, Risk Factors}, issn = {1095-9203}, doi = {10.1126/science.aal4043}, author = {Bastarache, Lisa and Hughey, Jacob J and Hebbring, Scott and Marlo, Joy and Zhao, Wanke and Ho, Wanting T and Van Driest, Sara L and McGregor, Tracy L and Mosley, Jonathan D and Wells, Quinn S and Temple, Michael and Ramirez, Andrea H and Carroll, Robert and Osterman, Travis and Edwards, Todd and Ruderfer, Douglas and Velez Edwards, Digna R and Hamid, Rizwan and Cogan, Joy and Glazer, Andrew and Wei, Wei-Qi and Feng, QiPing and Brilliant, Murray and Zhao, Zhizhuang J and Cox, Nancy J and Roden, Dan M and Denny, Joshua C} } @article {21, title = {Covariate selection for association screening in multiphenotype genetic studies.}, journal = {Nat Genet}, volume = {49}, year = {2017}, month = {2017 Dec}, pages = {1789-1795}, abstract = {

Testing for associations in big data faces the problem of multiple comparisons, wherein true signals are difficult to detect on the background of all associations queried. This difficulty is particularly salient in human genetic association studies, in which phenotypic variation is often driven by numerous variants of small effect. The current strategy to improve power to identify these weak associations consists of applying standard marginal statistical approaches and increasing study sample sizes. Although successful, this approach does not leverage the environmental and genetic factors shared among the multiple phenotypes collected in contemporary cohorts. Here we developed covariates for multiphenotype studies (CMS), an approach that improves power when correlated phenotypes are measured on the same samples. Our analyses of real and simulated data provide direct evidence that correlated phenotypes can be used to achieve increases in power to levels often surpassing the power gained by a twofold increase in sample size.

}, keywords = {Algorithms, Genetic Association Studies, Genetic Variation, Genome-Wide Association Study, Genotype, Humans, Models, Genetic, Multivariate Analysis, Phenotype, Reproducibility of Results, Sample Size}, issn = {1546-1718}, doi = {10.1038/ng.3975}, author = {Aschard, Hugues and Guillemot, Vincent and Vilhjalmsson, Bjarni and Patel, Chirag J and Skurnik, David and Ye, Chun J and Wolpin, Brian and Kraft, Peter and Zaitlen, Noah} } @article {47, title = {Weighted pseudolikelihood for SNP set analysis with multiple secondary outcomes in case-control genetic association studies.}, journal = {Biometrics}, volume = {73}, year = {2017}, month = {2017 12}, pages = {1210-1220}, abstract = {

We propose a weighted pseudolikelihood method for analyzing the association of a SNP set, example, SNPs in a gene or a genetic pathway or network, with multiple secondary phenotypes in case-control genetic association studies. To boost analysis power, we assume that the SNP-specific effects are shared across all secondary phenotypes using a scaled mean model. We estimate regression parameters using Inverse Probability Weighted (IPW) estimating equations obtained from the weighted pseudolikelihood, which accounts for case-control sampling to prevent potential ascertainment bias. To test the effect of a SNP set, we propose a weighted variance component pseudo-score test. We also propose a penalized IPW pseudolikelihood method for selecting a subset of SNPs that are associated with the multiple secondary phenotypes. We show that the proposed variable selection procedure has the oracle properties and is robust to misspecification of the correlation structure among secondary phenotypes. We select the tuning parameter using a weighted Bayesian Information-like Criterion (wBIC). We evaluate the finite sample performance of the proposed methods via simulations, and illustrate the methods by the analysis of the multiple secondary smoking behavior outcomes in a lung cancer case-control genetic association study.

}, keywords = {Case-Control Studies, Computer Simulation, Genetic Association Studies, Humans, Likelihood Functions, Lung Neoplasms, Phenotype, Polymorphism, Single Nucleotide, Smoking}, issn = {1541-0420}, doi = {10.1111/biom.12680}, author = {Sofer, Tamar and Schifano, Elizabeth D and Christiani, David C and Lin, Xihong} }