@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007, author = {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and Peter F. Stadler and Rolf Backofen}, title = {Inferring Non-Coding {RNA} Families and Classes by Means of Genome-Scale Structure-Based Clustering}, journal = {PLoS Comput Biol}, year = 2007, volume = {3}, number = {4}, pages = {e65}, issn = {1553-7358}, issn = {1553-734X}, pmid = {17432929}, doi = {10.1371/journal.pcbi.0030065}, user = {will}, abstract = {The RFAM database defines families of ncRNAs by means of sequence similarities that are sufficientto establish homology. In some cases, such as microRNAs, box H/ACA snoRNAs, functional commonalities define classes of RNAs that are characterized by structural similarities, and typically consist ofmultiple RNA families. Recent advances in high-throughput transcriptomics and comparative genomics have produced very large sets of putative non-coding RNAs and regulatory RNA signals. For many ofthem, evidence for stabilizing selection acting on their secondary structures has been derived, and at least approximate models of their structures have been computed. The overwhelming majority of these hypo-thetical RNAs cannot be assigned to established families or classes. We present here a structure-based clustering approach that is capable of extracting putative RNA classesfrom genome-wide surveys for structured RNAs. The LocARNA tool implements a novel variant of theSankoff algorithm that is sufficiently fast to deal with several thousand candidate sequences. The method is also robust against false positive predictions, i.e., a contamination of the input data with unstructured ornon-conserved sequences. We have successfully tested the LocARNA-based clustering approach on the sequences of the RFAM-seedalignments. Furthermore, we have applied it to a previously published set of 3332 predicted structured elements in the Ciona intestinalis genomes (Missal et al., Bioinformatics 21(S2), i77-i78). In addition torecovering e.g. tRNAs as a structure-based class, the method identifies several RNA families, including microRNA and snoRNA candidates, and suggests several novel classes of ncRNAs for which to-date norepresentative has been experimentally characterized.} }