Generated on Tue Dec 16 12:49:16 2008 for BIU-2.2.0 by doxygen 1.5.1

src/biu/RNAStructure.cc

Go to the documentation of this file.
00001 #include <biu/RNAStructure.hh>
00002 #include <stack>
00003 #include <biu/assertbiu.hh>
00004 #include <limits>
00005 
00006 namespace biu
00007 {
00008     const size_t RNAStructure::MIN_LOOP_LENGTH = 3;
00009     const size_t RNAStructure::INVALID_INDEX =
00010         std::numeric_limits<size_t>::max();
00011     const Alphabet RNAStructure::STRUCT_ALPH("().", 1);
00012     const Alphabet::AlphElem    RNAStructure::STRUCT_BND_OPEN = RNAStructure::STRUCT_ALPH.getElement("(");
00013     const Alphabet::AlphElem    RNAStructure::STRUCT_BND_CLOSE = RNAStructure::STRUCT_ALPH.getElement(")");
00014     const Alphabet::AlphElem    RNAStructure::STRUCT_UNBOUND = RNAStructure::STRUCT_ALPH.getElement(".");
00015 
00017 // RNAStructure
00019 
00020     RNAStructure::RNAStructure( const std::string& rnaSeqStr,
00021                                 const std::string& rnaStructBracketDotStr,
00022                                 const AllowedBasePairs* const _bPair)
00023      :  rnaSeq(NULL),
00024         seqShared(false),
00025         rnaStructBracketDot(NULL),
00026         rnaBonds(),
00027         bPair(_bPair) 
00028     {
00029         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00030         assertbiu(bPair->getAlphabet()->isAlphabetString(rnaSeqStr),
00031          "RNA sequence contains characters which are not in the alphabet.");
00032         assertbiu(STRUCT_ALPH.isAlphabetString(rnaStructBracketDotStr),
00033          "RNA structure contains characters which are not in the alphabet.");
00034 
00035         
00036         rnaSeq = new Sequence(bPair->getAlphabet()->getSequence(rnaSeqStr));
00037         rnaStructBracketDot = new Structure(
00038          STRUCT_ALPH.getSequence(rnaStructBracketDotStr));
00039 
00040         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00041          "RNA sequence and structure have to have the same length.");
00042     
00043           // init bond data structure
00044         initBonds();
00045     }
00046 
00047     RNAStructure::RNAStructure( Sequence* rnaSeq_,
00048                                 const Structure* const rnaStructBracketDot_,
00049                                 const AllowedBasePairs* const bPair_,
00050                                 const bool seqIsShared)
00051      :  rnaSeq(NULL),
00052         seqShared(seqIsShared),
00053         rnaStructBracketDot(NULL),
00054         bPair(bPair_) 
00055     {
00056         assertbiu(rnaSeq_ != NULL, "no RNA sequence given.");
00057         assertbiu(rnaStructBracketDot_ != NULL, "no RNA structure given.");
00058         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00059         assertbiu(bPair->getAlphabet()->isAlphabetSequence(*rnaSeq_),
00060          "RNA sequence contains characters which are not in the alphabet.");
00061         assertbiu(STRUCT_ALPH.isAlphabetSequence(*rnaStructBracketDot_),
00062          "RNA structure contains characters which are not in the alphabet.");
00063         assertbiu(rnaSeq_->size() == rnaStructBracketDot_->size(),
00064          "RNA sequence and structure have to have the same length.");
00065 
00066         
00067           // init sequence
00068         if (seqShared) {
00069             rnaSeq = rnaSeq_;
00070         } else {
00071             rnaSeq = new Sequence(*rnaSeq_);
00072         }
00073           // init structure
00074         rnaStructBracketDot = new Structure(*rnaStructBracketDot_);
00075 
00076           // init bond data structure
00077         initBonds();
00078     }
00079 
00080     RNAStructure::RNAStructure(const RNAStructure& rnaStruct)
00081      :  rnaSeq(rnaStruct.rnaSeq),
00082         seqShared(rnaStruct.seqShared),
00083         rnaStructBracketDot(new Structure(*(rnaStruct.rnaStructBracketDot))),
00084         rnaBonds(rnaStruct.rnaBonds), bPair(rnaStruct.bPair)
00085     {
00086         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00087         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00088          "RNA sequence and structure have to have the same length.");
00089         
00090         if (!seqShared) {
00091             rnaSeq = new Sequence(*(rnaStruct.rnaSeq));
00092         }
00093     }
00094 
00095     RNAStructure::~RNAStructure() {
00096         if(!seqShared && rnaSeq != NULL) {
00097             delete rnaSeq; rnaSeq = NULL;
00098         }
00099         if (rnaStructBracketDot != NULL) {
00100             delete rnaStructBracketDot; rnaStructBracketDot = NULL;
00101         }
00102     }
00103 
00104     bool        
00105     RNAStructure::isAllowedBasePair(    size_t first,
00106                                         size_t second)  const 
00107     {
00108         if (first == RNAStructure::INVALID_INDEX || second == RNAStructure::INVALID_INDEX)
00109             return false;
00110         assertbiu(first < rnaSeq->size() && second < rnaSeq->size(),
00111          "First or second is not a base position in the RNA sequence.");
00112         return bPair->allowedBasePair((*rnaSeq)[first], (*rnaSeq)[second]);
00113     }
00114 
00115     RNAStructure&   
00116     RNAStructure::operator= (const RNAStructure& rnaStruct2) {
00117         assertbiu(  seqShared == rnaStruct2.seqShared, 
00118                     "sequence of both has to be shared or not");
00119         if (this != &rnaStruct2) {
00120               // copy seq
00121             if (seqShared) { // copy only pointer
00122                 rnaSeq = rnaStruct2.rnaSeq;
00123             } else { // copy
00124                 rnaSeq->resize(rnaStruct2.rnaSeq->size());
00125                 std::copy(  rnaStruct2.rnaSeq->begin(),
00126                             rnaStruct2.rnaSeq->end(),
00127                             rnaSeq->begin());
00128             }
00129               // copy structure
00130             rnaStructBracketDot->resize(rnaStruct2.rnaStructBracketDot->size());
00131             std::copy(  rnaStruct2.rnaStructBracketDot->begin(),
00132                         rnaStruct2.rnaStructBracketDot->end(),
00133                         rnaStructBracketDot->begin());
00134               // copy remaining data
00135             bPair = rnaStruct2.bPair;
00136             rnaBonds = rnaStruct2.rnaBonds;
00137         }
00138         return *this;
00139     }
00140 
00141     bool        
00142     RNAStructure::operator== (const RNAStructure& rnaStruct2)
00143              const {
00144         return (rnaSeq == rnaStruct2.rnaSeq || *rnaSeq == *(rnaStruct2.rnaSeq))
00145             && *rnaStructBracketDot == *(rnaStruct2.rnaStructBracketDot)
00146             && rnaBonds == rnaStruct2.rnaBonds
00147             && *bPair == *(rnaStruct2.bPair);
00148     }
00149 
00150     bool        
00151     RNAStructure::operator!= (const RNAStructure& rnaStruct2)
00152              const {
00153         return (rnaSeq != rnaStruct2.rnaSeq && *rnaSeq != *(rnaStruct2.rnaSeq))
00154             || *rnaStructBracketDot != *(rnaStruct2.rnaStructBracketDot)
00155             || rnaBonds != rnaStruct2.rnaBonds
00156             || *bPair != *(rnaStruct2.bPair);
00157     }
00158 
00159     bool        
00160     RNAStructure::hasValidStructure() const {
00161         int count = 0;  // counter for open bonds
00162         // each opening bracket has to have a closing bracket
00163         for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00164             if ((*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00165                 count++;
00166             }
00167             else if ((*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE) {
00168                 count--;
00169             }
00170             if (count < 0) {
00171                 return false;
00172             }
00173         }
00174         return (count == 0);
00175     }
00176 
00177     bool        
00178     RNAStructure::hasValidBasePairs() const{
00179         for (size_t i=0; i < rnaBonds.size(); i++) {
00180             // existing bond has to be an allowed base pair
00181             if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00182                 && !isAllowedBasePair(i, rnaBonds[i])) {
00183                 return false;
00184             }
00185         }
00186         return true;
00187     }   
00188 
00189     bool        
00190     RNAStructure::hasMinLoopLength() const {
00191         for (size_t i=0; i < rnaBonds.size(); i++) {
00192             // existing bond has to have at least MIN_LENGTH_LOOP
00193             if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00194                 && (rnaBonds[i]-i) <= RNAStructure::MIN_LOOP_LENGTH) {
00195                 return false;
00196             }
00197         }
00198         return true;
00199     }
00200 
00201 
00202     bool        
00203     RNAStructure::isValid() const{
00204         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00205                  "RNA sequence and structure have to have the same length.");
00206         return (    hasValidStructure() && hasMinLoopLength()
00207                     && hasValidBasePairs() );
00208     }
00209 
00210     std::string 
00211     RNAStructure::getStringRepresentation() const {
00212         return ( getStructureString() + "(" + getSequenceString() + ")" );
00213     }
00214     
00215     void
00216     RNAStructure::initBonds() {
00217           // resize and initialise bonds vector
00218         rnaBonds = std::vector<size_t>
00219                (rnaStructBracketDot->size(), RNAStructure::INVALID_INDEX);
00220 
00221           // do initialization only if structure is valid 
00222         if (!hasValidStructure()) {
00223             return;
00224         }
00225           // std::stack openingBonds contains opening bonds to create
00226           // vector rnaBonds during intitialisation
00227         std::stack<size_t>  openingBonds;
00228         
00229           // do update for real initialisation
00230         for (size_t i=0; i <  rnaStructBracketDot->size(); i++) {
00231             if ( (*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00232                 openingBonds.push(i);
00233             }
00234             else if ( (*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE ) {
00235                 assertbiu(!openingBonds.empty(),
00236                  "Closing without opening bond.");
00237                 // check whether pairing of bases allowed   
00238                 rnaBonds[openingBonds.top()] = i;
00239                 openingBonds.pop();
00240             }
00241         }
00242         assertbiu(openingBonds.empty(), "Opening without closing bond.");
00243     }
00244 
00245 } // namespace biu