@Article{Mann_LatPack_HFSP_08, author = {Martin Mann and Daniel Maticzka and Rhodri Saunders and Rolf Backofen}, title = {Classifying protein-like sequences in arbitrary lattice protein models using {LatPack}}, journal = {HFSP Journal}, issn = {1955-2068}, year = {2008}, volume = {2}, number = {6}, pages = {396-404}, doi = {10.2976/1.3027681}, note = {Special issue on protein folding: experimental and theoretical approaches}, user = {mmann}, abstract = {Knowledge of a protein's 3-dimensional native structure is vital in determining its chemical properties and functionality. However, experimental methods to determine structure are very costly and time-consuming. Computational approaches, such as folding simulations and structure prediction algorithms, are quicker and cheaper but lack consistent accuracy. This currently restricts extensive computational studies to abstract protein models. It is thus essential that simplifications induced by the models do not negate scientific value. Key to this is the use of thoroughly defined protein-like sequences. In such cases abstract models can allow for the investigation of important biological questions. Here we present a procedure to generate and classify protein-like sequence data sets. Our LatPack tools, and the approach in general, are applicable to arbitrary lattice protein models. Identification is based on thermodynamic and kinetic features. Further LatPack can incorporate the sequential assembly of proteins by addressing co-translational folding. We demonstrate the approach in the widely used, unrestricted 3D-cubic HP-model. The resulting sequence set is the first large data set for this model exhibiting the protein-like properties required. Our data and tools are freely available and can be used to investigate protein-related problems. Furthermore our data sets can serve as the first benchmark sequence sets for folding algorithms that have traditionally only been tested on random sequences.} }