@article {51, title = {A multi-task convolutional deep neural network for variant calling in single molecule sequencing.}, journal = {Nat Commun}, volume = {10}, year = {2019}, month = {2019 03 01}, pages = {998}, abstract = {

The accurate identification of DNA sequence variants is an important, but challenging task in genomics. It is particularly difficult for single molecule sequencing, which has a per-nucleotide error rate of ~5-15\%. Meeting this demand, we developed Clairvoyante, a multi-task five-layer convolutional neural network model for predicting variant type (SNP or indel), zygosity, alternative allele and indel length from aligned reads. For the well-characterized NA12878 human sample, Clairvoyante achieves 99.67, 95.78, 90.53\% F1-score on 1KP common variants, and 98.65, 92.57, 87.26\% F1-score for whole-genome analysis, using Illumina, PacBio, and Oxford Nanopore data, respectively. Training on a second human sample shows Clairvoyante is sample agnostic and finds variants in less than 2 h on a standard server. Furthermore, we present 3,135 variants that are missed using Illumina but supported independently by both PacBio and Oxford Nanopore reads. Clairvoyante is available open-source ( https://github.com/aquaskyline/Clairvoyante ), with modules to train, utilize and visualize the model.

}, keywords = {Base Sequence, Computational Biology, DNA Mutational Analysis, Genome, Human, Genome-Wide Association Study, Genomics, Genotype, Genotyping Techniques, Humans, INDEL Mutation, Nanopores, Neural Networks, Computer, Polymorphism, Single Nucleotide, Sequence Analysis, DNA, Software}, issn = {2041-1723}, doi = {10.1038/s41467-019-09025-z}, author = {Luo, Ruibang and Sedlazeck, Fritz J and Lam, Tak-Wah and Schatz, Michael C} }