@article{pudimat:schukat:2008:ijprai,
  author =	 {Rainer Pudimat and Rolf Backofen and Ernst-G\"unter Schukat-Talamazzini},
  title =	 {Fast Feature Subset Selection in Biological Sequence Analysis},
  journal =	 {International Journal of Pattern Recognition and Artificial Intelligence},
  year =	 2009,
  volume =    23,
  number =    2,
  pages =     {191 -- 207},
  abstract =	 {Motivation:Biological research produces a wealth of
                  measured data. Neither it is easy for biologists to
                  postulate hypotheses about the behaviour or structure
                  of the observed entity because the relevant properties
                  measured are not seen in the ocean of
                  measurements. Nor it is easy to design machine
                  learning algorithms to classify or cluster the data
                  items  for the same reason. Algorithms for
                  automatically selecting a highly predictive subset of
                  the measured features can help to overcome these
                  difficulties.Results: We present an efficient feature
                  selection strategy which can be applied to arbitrary
                  feature selection problems. The core technique is a
                  new method for estimating the quality of subsets from
                  previously calculated qualities for smaller subsets by
                  minimising the mean standard error of estimated values
                  with an approach common to support vector
                  machines. This method can be integrated in many
                  feature subset search algorithms. We have applied it
                  with sequential search algorithms and have been able
                  to reduce the number of quality calculations for
                  finding accurate feature subsets by about $70\%$. We
                  show these improvements by applying our approach to
                  the problem of finding highly predictive feature
                  subsets for transcription factor binding sites.},
  user =	 {rpudimat}
}