Generated on Tue Dec 16 12:49:16 2008 for BIU-2.2.0 by doxygen 1.5.1

src/biu/Alphabet.hh

Go to the documentation of this file.
00001 #ifndef ALPHABET_HH_
00002 #define ALPHABET_HH_
00003 
00004 
00005 #include <vector>
00006 #include <string>
00007 
00008 #include "biu/HashMap.hh"
00009 
00010 namespace biu
00011 {
00018     class Alphabet
00019     {
00020     public:
00022         typedef size_t  AlphElem;
00024         typedef unsigned char   CAlphElem;
00025         
00027         typedef std::vector<AlphElem> Sequence;
00028         
00030         typedef std::vector<CAlphElem> CSequence;
00031         
00032     private:
00033 
00034 #ifdef HAVE_GNU_HASH_MAP
00035 
00039         class hash_string {
00040         public:
00041 
00042             size_t operator()(const std::string& str) const
00043             {
00044                 size_t hash = 5381;
00045     
00046                 for (size_t i = 0; i < str.size(); i++) {
00047                     hash = ((hash << 5) + hash) + (size_t)str[i]; // hash * 33 + str[i]
00048                 }
00049     
00050                 return hash;
00051             }
00052                  
00053         };
00054     
00055         typedef __gnu_cxx::hash_map< std::string, AlphElem, hash_string > STR2ALPH_MAP;
00056 #else
00057         typedef std::map< std::string, AlphElem > STR2ALPH_MAP;
00058 #endif
00059 
00062         STR2ALPH_MAP string2alph;   
00063         
00065         std::vector<std::string> alph2string;   
00066         
00068         size_t elementLength;
00069         
00073         std::vector<int> compressBase;
00074         
00075     public:
00076     
00084         Alphabet(   const std::string& alphabetString, 
00085                     const size_t elementLength);
00086         
00092         Alphabet(const std::vector<std::string> & alphabetStrings);
00093         
00094         virtual ~Alphabet();
00095 
00096         bool operator== (const Alphabet& alph2) const;
00097         bool operator!= (const Alphabet& alph2) const;
00098 
00101         size_t getAlphabetSize() const;
00102         
00105         size_t getElementLength() const;
00106     
00108         // string to sequence to string
00110     
00114         Sequence getSequence(const std::string& seqString) const;
00115         
00119         AlphElem getElement(const std::string& alphElemStr) const;
00120         
00124         std::string getString(const Alphabet::Sequence& sequence) const;
00125         
00129         std::string getString(const Alphabet::AlphElem& elem) const;
00130         
00134         CSequence compress(const Alphabet::Sequence& sequence) const;
00135         
00139         CSequence compressS(const std::string& sequence) const;
00140         
00145         Sequence decompress(const CSequence& sequence, const size_t seqLength) const;
00146         
00151         std::string decompressS(const CSequence& sequence, const size_t seqLength) const;
00152         
00153     
00155         // miscellaneous
00157     
00163         bool isAlphabetString(const std::string& str) const;
00164         
00170         bool isAlphabetSequence(const Sequence& seq) const;
00171         
00176         size_t getIndex(const AlphElem& elem) const;
00177         
00183         size_t getIndex(const std::string& elemStr) const;
00184         
00188         AlphElem getElement(const size_t index) const;
00189         
00190     };
00191 
00192 } // namespace biu
00193 
00194   // include definitions
00195 #include "biu/Alphabet.icc"
00196 
00197 #endif /*ALPHABET_HH_*/