@article {81, title = {Identification of cancer driver genes based on nucleotide context.}, journal = {Nat Genet}, volume = {52}, year = {2020}, month = {2020 02}, pages = {208-218}, abstract = {

Cancer genomes contain large numbers of somatic mutations but few of these mutations drive tumor development. Current approaches either identify driver genes on the basis of mutational recurrence or approximate the functional consequences of nonsynonymous mutations by using bioinformatic scores. Passenger mutations are enriched in characteristic nucleotide contexts, whereas driver mutations occur in functional positions, which are not necessarily surrounded by a particular nucleotide context. We observed that mutations in contexts that deviate from the characteristic contexts around passenger mutations provide a signal in favor of driver genes. We therefore developed a method that combines this feature with the signals traditionally used for driver-gene identification. We applied our method to whole-exome sequencing data from 11,873 tumor-normal pairs and identified 460 driver genes that clustered into 21 cancer-related pathways. Our study provides a resource of driver genes across 28 tumor types with additional driver genes identified according to mutations in unusual nucleotide contexts.

}, keywords = {Cluster Analysis, Computational Biology, Humans, Mutation, Neoplasms, Nucleotides, Proteins, Whole Exome Sequencing}, issn = {1546-1718}, doi = {10.1038/s41588-019-0572-y}, author = {Dietlein, Felix and Weghorn, Donate and Taylor-Weiner, Amaro and Richters, Andr{\'e} and Reardon, Brendan and Liu, David and Lander, Eric S and Van Allen, Eliezer M and Sunyaev, Shamil R} } @article {70, title = {Applicability of the Mutation-Selection Balance Model to Population Genetics of Heterozygous Protein-Truncating Variants in Humans.}, journal = {Mol Biol Evol}, volume = {36}, year = {2019}, month = {2019 08 01}, pages = {1701-1710}, abstract = {

The fate of alleles in the human population is believed to be highly affected by the stochastic force of genetic drift. Estimation of the strength of natural selection in humans generally necessitates a careful modeling of drift including complex effects of the population history and structure. Protein-truncating variants (PTVs) are expected to evolve under strong purifying selection and to have a relatively high per-gene mutation rate. Thus, it is appealing to model the population genetics of PTVs under a simple deterministic mutation-selection balance, as has been proposed earlier (Cassa et~al. 2017). Here, we investigated the limits of this approximation using both computer simulations and data-driven approaches. Our simulations rely on a model of demographic history estimated from 33,370 individual exomes of the Non-Finnish European subset of the ExAC data set (Lek et~al. 2016). Additionally, we compared the African and European subset of the ExAC study and analyzed de novo PTVs. We show that the mutation-selection balance model is applicable to the majority of human genes, but not to genes under the weakest selection.

}, issn = {1537-1719}, doi = {10.1093/molbev/msz092}, author = {Weghorn, Donate and Balick, Daniel J and Cassa, Christopher and Kosmicki, Jack A and Daly, Mark J and Beier, David R and Sunyaev, Shamil R} } @article {11, title = {Estimating the selective effects of heterozygous protein-truncating variants from human exome data.}, journal = {Nat Genet}, volume = {49}, year = {2017}, month = {2017 May}, pages = {806-810}, abstract = {

The evolutionary cost of gene loss is a central question in genetics and has been investigated in model organisms and human cell lines. In humans, tolerance of the loss of one or both functional copies of a gene is related to the gene{\textquoteright}s causal role in disease. However, estimates of the selection and dominance coefficients in humans have been elusive. Here we analyze exome sequence data from 60,706 individuals to make genome-wide estimates of selection against heterozygous loss of gene function. Using this distribution of selection coefficients for heterozygous protein-truncating variants (PTVs), we provide corresponding Bayesian estimates for individual genes. We find that genes under the strongest selection are enriched in embryonic lethal mouse knockouts, Mendelian disease-associated genes, and regulators of transcription. Screening by essentiality, we find a large set of genes under strong selection that are likely to have crucial functions but have not yet been thoroughly characterized.

}, keywords = {Algorithms, Animals, Bayes Theorem, Exome, Gene Frequency, Genetic Predisposition to Disease, Genetic Variation, Genome-Wide Association Study, Genotype, Heterozygote, Humans, Mice, Knockout, Models, Genetic, Mutation, Selection, Genetic, Sequence Analysis, DNA}, issn = {1546-1718}, doi = {10.1038/ng.3831}, author = {Cassa, Christopher A and Weghorn, Donate and Balick, Daniel J and Jordan, Daniel M and Nusinow, David and Samocha, Kaitlin E and O{\textquoteright}Donnell-Luria, Anne and MacArthur, Daniel G and Daly, Mark J and Beier, David R and Sunyaev, Shamil R} }