@article{pudimat:2005:bioinformatics, author = {Rainer Pudimat and E.G. Schukat-Talamazzini and Rolf Backofen}, title = {A multiple-feature framework for modelling and predicting transcription factor binding sites}, journal = {Bioinformatics}, volume = {21}, number = {14}, pages = {3082-8}, publists = {All and Rainer Pudimat and Rolf Backofen}, year = {2005}, abstract = {Motivation: The identification of transcription factor binding sites in promoter sequences is an important problem, since it reveals information about the transcriptional regulation of genes. For analysing transcriptional regulation, computational approaches for predicting putative binding sites are applied. Commonly used stochastic models for binding sites are position specific score matrices (PSSM), which show weak predictive power. Results: We have developed a probabilistic modelling approach which allows to consider diverse characteristic binding site properties to obtain more accurate representations of binding sites. These properties are modelled as random variables in Bayesian networks, which are capable to deal with dependencies amongbinding site properties. Cross validation on several data sets shows improvements in the false positive error rate and the significance (p-value) of true binding sites. Availability: A more extensive description of validation results are available at http://www.bio.inf.uni-jena.de/Software/promapper/

Contact: backofen@inf.uni-jena.de}, user = {rpudimat} }