@article {144, title = {A cross-population atlas of genetic associations for 220 human phenotypes.}, journal = {Nat Genet}, volume = {53}, year = {2021}, month = {2021 10}, pages = {1415-1424}, abstract = {

Current genome-wide association studies do not yet capture sufficient diversity in populations and scope of phenotypes. To expand an atlas of genetic associations in non-European populations, we conducted 220 deep-phenotype genome-wide association studies (diseases, biomarkers and medication usage) in BioBank Japan (n = 179,000), by incorporating past medical history and text-mining of electronic medical records. Meta-analyses with the UK Biobank and FinnGen (n = 628,000) identified ~5,000 new loci, which improved the resolution of the genomic map of human traits. This atlas elucidated the landscape of pleiotropy as represented by the major histocompatibility complex locus, where we conducted HLA fine-mapping. Finally, we performed statistical decomposition of matrices of phenome-wide summary statistics, and identified latent genetic components, which pinpointed responsible variants and biological mechanisms underlying current disease classifications across populations. The decomposed components enabled genetically informed subtyping of similar diseases (for example, allergic diseases). Our study suggests a potential avenue for hypothesis-free re-investigation of human diseases through genetics.

}, keywords = {ABO Blood-Group System, Biological Specimen Banks, Genetic Association Studies, Genetic Loci, Genetic Pleiotropy, Genetic Predisposition to Disease, Genome-Wide Association Study, Humans, Major Histocompatibility Complex, Meta-Analysis as Topic, Mutation, Phenotype}, issn = {1546-1718}, doi = {10.1038/s41588-021-00931-x}, author = {Sakaue, Saori and Kanai, Masahiro and Tanigawa, Yosuke and Karjalainen, Juha and Kurki, Mitja and Koshiba, Seizo and Narita, Akira and Konuma, Takahiro and Yamamoto, Kenichi and Akiyama, Masato and Ishigaki, Kazuyoshi and Suzuki, Akari and Suzuki, Ken and Obara, Wataru and Yamaji, Ken and Takahashi, Kazuhisa and Asai, Satoshi and Takahashi, Yasuo and Suzuki, Takao and Shinozaki, Nobuaki and Yamaguchi, Hiroki and Minami, Shiro and Murayama, Shigeo and Yoshimori, Kozo and Nagayama, Satoshi and Obata, Daisuke and Higashiyama, Masahiko and Masumoto, Akihide and Koretsune, Yukihiro and Ito, Kaoru and Terao, Chikashi and Yamauchi, Toshimasa and Komuro, Issei and Kadowaki, Takashi and Tamiya, Gen and Yamamoto, Masayuki and Nakamura, Yusuke and Kubo, Michiaki and Murakami, Yoshinori and Yamamoto, Kazuhiko and Kamatani, Yoichiro and Palotie, Aarno and Rivas, Manuel A and Daly, Mark J and Matsuda, Koichi and Okada, Yukinori} } @article {128, title = {A phenome-wide association study of 26 mendelian genes reveals phenotypic expressivity of common and rare variants within the general population.}, journal = {PLoS Genet}, volume = {16}, year = {2020}, month = {2020 11}, pages = {e1008802}, abstract = {

The clinical evaluation of a genetic syndrome relies upon recognition of a characteristic pattern of signs or symptoms to guide targeted genetic testing for confirmation of the diagnosis. However, individuals displaying a single phenotype of a complex syndrome may not meet criteria for clinical diagnosis or genetic testing. Here, we present a phenome-wide association study (PheWAS) approach to systematically explore the phenotypic expressivity of common and rare alleles in genes associated with four well-described syndromic diseases (Alagille (AS), Marfan (MS), DiGeorge (DS), and Noonan (NS) syndromes) in the general population. Using human phenotype ontology (HPO) terms, we systematically mapped 60 phenotypes related to AS, MS, DS and NS in 337,198 unrelated white British from the UK Biobank (UKBB) based on their hospital admission records, self-administrated questionnaires, and physiological measurements. We performed logistic regression adjusting for age, sex, and the first 5 genetic principal components, for each phenotype and each variant in the target genes (JAG1, NOTCH2 FBN1, PTPN1 and RAS-opathy genes, and genes in the 22q11.2 locus) and performed a gene burden test. Overall, we observed multiple phenotype-genotype correlations, such as the association between variation in JAG1, FBN1, PTPN11 and SOS2 with diastolic and systolic blood pressure; and pleiotropy among multiple variants in syndromic genes. For example, rs11066309 in PTPN11 was significantly associated with a lower body mass index, an increased risk of hypothyroidism and a smaller size for gestational age, all in concordance with NS-related phenotypes. Similarly, rs589668 in FBN1 was associated with an increase in body height and blood pressure, and a reduced body fat percentage as observed in Marfan syndrome. Our findings suggest that the spectrum of associations of common and rare variants in genes involved in syndromic diseases can be extended to individual phenotypes within the general population.

}, keywords = {Alagille Syndrome, Alleles, Biological Variation, Population, DiGeorge Syndrome, European Continental Ancestry Group, Female, Gene Frequency, Genetic Association Studies, Genetic Predisposition to Disease, Genetic Testing, Genetic Variation, Genome-Wide Association Study, Humans, Male, Marfan Syndrome, Noonan Syndrome, Phenotype, Polymorphism, Single Nucleotide, United Kingdom}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1008802}, author = {Tcheandjieu, Catherine and Aguirre, Matthew and Gustafsson, Stefan and Saha, Priyanka and Potiny, Praneetha and Haendel, Melissa and Ingelsson, Erik and Rivas, Manuel A and Priest, James R} } @article {111, title = {A unified framework for joint-tissue transcriptome-wide association and Mendelian randomization analysis.}, journal = {Nat Genet}, volume = {52}, year = {2020}, month = {2020 11}, pages = {1239-1246}, abstract = {

Here, we present a joint-tissue imputation (JTI) approach and a Mendelian randomization framework for causal inference, MR-JTI. JTI borrows information across transcriptomes of different tissues, leveraging shared genetic regulation, to improve prediction performance in a tissue-dependent manner. Notably, JTI includes the single-tissue imputation method PrediXcan as a special case and outperforms other single-tissue approaches (the Bayesian sparse linear mixed model and Dirichlet process regression). MR-JTI models variant-level heterogeneity (primarily due to horizontal pleiotropy, addressing a major challenge of transcriptome-wide association study interpretation) and performs causal inference with type I error control. We make explicit the connection between the genetic architecture of gene expression and of complex traits and the suitability of Mendelian randomization as a causal inference strategy for transcriptome-wide association studies. We provide a resource of imputation models generated from GTEx and PsychENCODE panels. Analysis of biobanks and meta-analysis data, and extensive simulations show substantially improved statistical power, replication and causal mapping rate for JTI relative to existing approaches.

}, keywords = {Animals, Gene Expression Profiling, Genetic Association Studies, Humans, Lipoproteins, LDL, Mendelian Randomization Analysis, Mice, Models, Genetic, Multifactorial Inheritance, Predictive Value of Tests}, issn = {1546-1718}, doi = {10.1038/s41588-020-0706-2}, author = {Zhou, Dan and Jiang, Yi and Zhong, Xue and Cox, Nancy J and Liu, Chunyu and Gamazon, Eric R} } @article {50, title = {Efficient Variant Set Mixed Model Association Tests for Continuous and Binary Traits in Large-Scale Whole-Genome Sequencing Studies.}, journal = {Am J Hum Genet}, volume = {104}, year = {2019}, month = {2019 02 07}, pages = {260-274}, abstract = {

With advances in whole-genome sequencing (WGS) technology, more advanced statistical methods for testing genetic association with rare variants are being developed. Methods in which variants are grouped for analysis are also known as variant-set, gene-based, and aggregate unit tests. The burden test and sequence kernel association test (SKAT) are two widely used variant-set tests, which were originally developed for samples of unrelated individuals and later have been extended to family data with known pedigree structures. However, computationally efficient and powerful variant-set tests are needed to make analyses tractable in large-scale WGS studies with complex study samples. In this paper, we propose the variant-set mixed model association tests (SMMAT) for continuous and binary traits using the generalized linear mixed model framework. These tests can be applied to large-scale WGS studies involving samples with population structure and relatedness, such as in the National Heart, Lung, and Blood Institute{\textquoteright}s Trans-Omics for Precision Medicine (TOPMed) program. SMMATs share the same null model for different variant sets, and a virtue of this null model, which includes covariates only, is that it needs to be fit only once for all tests in each genome-wide analysis. Simulation studies show that all the proposed SMMATs correctly control type I error rates for both continuous and binary traits in the presence of population structure and relatedness. We also illustrate our tests in a real data example of analysis of plasma fibrinogen levels in the TOPMed program (n = 23,763), using the Analysis Commons, a cloud-based computing platform.

}, keywords = {Chromosomes, Human, Pair 4, Cloud Computing, Female, Fibrinogen, Genetic Association Studies, Genetics, Population, Humans, Male, Models, Genetic, National Heart, Lung, and Blood Institute (U.S.), Precision Medicine, Research Design, Time Factors, United States, Whole Genome Sequencing}, issn = {1537-6605}, doi = {10.1016/j.ajhg.2018.12.012}, author = {Chen, Han and Huffman, Jennifer E and Brody, Jennifer A and Wang, Chaolong and Lee, Seunggeun and Li, Zilin and Gogarten, Stephanie M and Sofer, Tamar and Bielak, Lawrence F and Bis, Joshua C and Blangero, John and Bowler, Russell P and Cade, Brian E and Cho, Michael H and Correa, Adolfo and Curran, Joanne E and de Vries, Paul S and Glahn, David C and Guo, Xiuqing and Johnson, Andrew D and Kardia, Sharon and Kooperberg, Charles and Lewis, Joshua P and Liu, Xiaoming and Mathias, Rasika A and Mitchell, Braxton D and O{\textquoteright}Connell, Jeffrey R and Peyser, Patricia A and Post, Wendy S and Reiner, Alex P and Rich, Stephen S and Rotter, Jerome I and Silverman, Edwin K and Smith, Jennifer A and Vasan, Ramachandran S and Wilson, James G and Yanek, Lisa R and Redline, Susan and Smith, Nicholas L and Boerwinkle, Eric and Borecki, Ingrid B and Cupples, L Adrienne and Laurie, Cathy C and Morrison, Alanna C and Rice, Kenneth M and Lin, Xihong} } @article {39, title = {Imputation-Aware Tag SNP Selection To Improve Power for Large-Scale, Multi-ethnic Association Studies.}, journal = {G3 (Bethesda)}, volume = {8}, year = {2018}, month = {2018 10 03}, pages = {3255-3267}, abstract = {

The emergence of very large cohorts in genomic research has facilitated a focus on genotype-imputation strategies to power rare variant association. These strategies have benefited from improvements in imputation methods and association tests, however little attention has been paid to ways in which array design can increase rare variant association power. Therefore, we developed a novel framework to select tag SNPs using the reference panel of 26 populations from Phase 3 of the 1000 Genomes Project. We evaluate tag SNP performance mean imputed r at untyped sites using leave-one-out internal validation and standard imputation methods, rather than pairwise linkage disequilibrium. Moving beyond pairwise metrics allows us to account for haplotype diversity across the genome for improve imputation accuracy and demonstrates population-specific biases from pairwise estimates. We also examine array design strategies that contrast multi-ethnic cohorts single populations, and show a boost in performance for the former can be obtained by prioritizing tag SNPs that contribute information across multiple populations simultaneously. Using our framework, we demonstrate increased imputation accuracy for rare variants (frequency < 1\%) by 0.5-3.1\% for an array of one million sites and 0.7-7.1\% for an array of 500,000 sites, depending on the population. Finally, we show how recent explosive growth in non-African populations means tag SNPs capture on average 30\% fewer other variants than in African populations. The unified framework presented here will enable investigators to make informed decisions for the design of new arrays, and help empower the next phase of rare variant association for global health.

}, keywords = {Computational Biology, Databases, Nucleic Acid, Ethnic Groups, Genetic Association Studies, Genetics, Population, Genome-Wide Association Study, Humans, Linkage Disequilibrium, Models, Genetic, Polymorphism, Single Nucleotide, Reproducibility of Results, Selection, Genetic}, issn = {2160-1836}, doi = {10.1534/g3.118.200502}, author = {Wojcik, Genevieve L and Fuchsberger, Christian and Taliun, Daniel and Welch, Ryan and Martin, Alicia R and Shringarpure, Suyash and Carlson, Christopher S and Abecasis, Goncalo and Kang, Hyun Min and Boehnke, Michael and Bustamante, Carlos D and Gignoux, Christopher R and Kenny, Eimear E} } @article {29, title = {Phenotype risk scores identify patients with unrecognized Mendelian disease patterns.}, journal = {Science}, volume = {359}, year = {2018}, month = {2018 03 16}, pages = {1233-1239}, abstract = {

Genetic association studies often examine features independently, potentially missing subpopulations with multiple phenotypes that share a single cause. We describe an approach that aggregates phenotypes on the basis of patterns described by Mendelian diseases. We mapped the clinical features of 1204 Mendelian diseases into phenotypes captured from the electronic health record (EHR) and summarized this evidence as phenotype risk scores (PheRSs). In an initial validation, PheRS distinguished cases and controls of five Mendelian diseases. Applying PheRS to 21,701 genotyped individuals uncovered 18 associations between rare variants and phenotypes consistent with Mendelian diseases. In 16 patients, the rare genetic variants were associated with severe outcomes such as organ transplants. PheRS can augment rare-variant interpretation and may identify subsets of patients with distinct genetic causes for common diseases.

}, keywords = {Databases, Genetic, DNA Mutational Analysis, Electronic Health Records, Exome, Genetic Association Studies, Genetic Diseases, Inborn, Genetic Predisposition to Disease, Genetic Variation, Humans, Phenotype, Risk Factors}, issn = {1095-9203}, doi = {10.1126/science.aal4043}, author = {Bastarache, Lisa and Hughey, Jacob J and Hebbring, Scott and Marlo, Joy and Zhao, Wanke and Ho, Wanting T and Van Driest, Sara L and McGregor, Tracy L and Mosley, Jonathan D and Wells, Quinn S and Temple, Michael and Ramirez, Andrea H and Carroll, Robert and Osterman, Travis and Edwards, Todd and Ruderfer, Douglas and Velez Edwards, Digna R and Hamid, Rizwan and Cogan, Joy and Glazer, Andrew and Wei, Wei-Qi and Feng, QiPing and Brilliant, Murray and Zhao, Zhizhuang J and Cox, Nancy J and Roden, Dan M and Denny, Joshua C} } @article {21, title = {Covariate selection for association screening in multiphenotype genetic studies.}, journal = {Nat Genet}, volume = {49}, year = {2017}, month = {2017 Dec}, pages = {1789-1795}, abstract = {

Testing for associations in big data faces the problem of multiple comparisons, wherein true signals are difficult to detect on the background of all associations queried. This difficulty is particularly salient in human genetic association studies, in which phenotypic variation is often driven by numerous variants of small effect. The current strategy to improve power to identify these weak associations consists of applying standard marginal statistical approaches and increasing study sample sizes. Although successful, this approach does not leverage the environmental and genetic factors shared among the multiple phenotypes collected in contemporary cohorts. Here we developed covariates for multiphenotype studies (CMS), an approach that improves power when correlated phenotypes are measured on the same samples. Our analyses of real and simulated data provide direct evidence that correlated phenotypes can be used to achieve increases in power to levels often surpassing the power gained by a twofold increase in sample size.

}, keywords = {Algorithms, Genetic Association Studies, Genetic Variation, Genome-Wide Association Study, Genotype, Humans, Models, Genetic, Multivariate Analysis, Phenotype, Reproducibility of Results, Sample Size}, issn = {1546-1718}, doi = {10.1038/ng.3975}, author = {Aschard, Hugues and Guillemot, Vincent and Vilhjalmsson, Bjarni and Patel, Chirag J and Skurnik, David and Ye, Chun J and Wolpin, Brian and Kraft, Peter and Zaitlen, Noah} } @article {47, title = {Weighted pseudolikelihood for SNP set analysis with multiple secondary outcomes in case-control genetic association studies.}, journal = {Biometrics}, volume = {73}, year = {2017}, month = {2017 12}, pages = {1210-1220}, abstract = {

We propose a weighted pseudolikelihood method for analyzing the association of a SNP set, example, SNPs in a gene or a genetic pathway or network, with multiple secondary phenotypes in case-control genetic association studies. To boost analysis power, we assume that the SNP-specific effects are shared across all secondary phenotypes using a scaled mean model. We estimate regression parameters using Inverse Probability Weighted (IPW) estimating equations obtained from the weighted pseudolikelihood, which accounts for case-control sampling to prevent potential ascertainment bias. To test the effect of a SNP set, we propose a weighted variance component pseudo-score test. We also propose a penalized IPW pseudolikelihood method for selecting a subset of SNPs that are associated with the multiple secondary phenotypes. We show that the proposed variable selection procedure has the oracle properties and is robust to misspecification of the correlation structure among secondary phenotypes. We select the tuning parameter using a weighted Bayesian Information-like Criterion (wBIC). We evaluate the finite sample performance of the proposed methods via simulations, and illustrate the methods by the analysis of the multiple secondary smoking behavior outcomes in a lung cancer case-control genetic association study.

}, keywords = {Case-Control Studies, Computer Simulation, Genetic Association Studies, Humans, Likelihood Functions, Lung Neoplasms, Phenotype, Polymorphism, Single Nucleotide, Smoking}, issn = {1541-0420}, doi = {10.1111/biom.12680}, author = {Sofer, Tamar and Schifano, Elizabeth D and Christiani, David C and Lin, Xihong} }