Generated on Mon Jun 23 16:24:56 2008 for BIU-1.7.0 by doxygen 1.5.1

src/biu/RNAStructure.cc

Go to the documentation of this file.
00001 #include <biu/RNAStructure.hh>
00002 #include <stack>
00003 #include <biu/assertbiu.hh>
00004 
00005 namespace biu
00006 {
00007     const size_t RNAStructure::MIN_LOOP_LENGTH = 3;
00008     const size_t RNAStructure::INVALID_INDEX =
00009         std::numeric_limits<size_t>::max();
00010     const Alphabet RNAStructure::STRUCT_ALPH("().", 1);
00011     const Alphabet::AlphElem    RNAStructure::STRUCT_BND_OPEN = RNAStructure::STRUCT_ALPH.getElement("(");
00012     const Alphabet::AlphElem    RNAStructure::STRUCT_BND_CLOSE = RNAStructure::STRUCT_ALPH.getElement(")");
00013     const Alphabet::AlphElem    RNAStructure::STRUCT_UNBOUND = RNAStructure::STRUCT_ALPH.getElement(".");
00014 
00016 // RNAStructure
00018 
00019     RNAStructure::RNAStructure( const std::string& rnaSeqStr,
00020                                 const std::string& rnaStructBracketDotStr,
00021                                 const AllowedBasePairs* const _bPair)
00022      :  rnaSeq(NULL),
00023         seqShared(false),
00024         rnaStructBracketDot(NULL),
00025         rnaBonds(),
00026         bPair(_bPair) 
00027     {
00028         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00029         assertbiu(bPair->getAlphabet()->isAlphabetString(rnaSeqStr),
00030          "RNA sequence contains characters which are not in the alphabet.");
00031         assertbiu(STRUCT_ALPH.isAlphabetString(rnaStructBracketDotStr),
00032          "RNA structure contains characters which are not in the alphabet.");
00033 
00034         
00035         rnaSeq = new Sequence(bPair->getAlphabet()->getSequence(rnaSeqStr));
00036         rnaStructBracketDot = new Structure(
00037          STRUCT_ALPH.getSequence(rnaStructBracketDotStr));
00038 
00039         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00040          "RNA sequence and structure have to have the same length.");
00041     
00042           // init bond data structure
00043         initBonds();
00044     }
00045 
00046     RNAStructure::RNAStructure( Sequence* rnaSeq_,
00047                                 const Structure* const rnaStructBracketDot_,
00048                                 const AllowedBasePairs* const bPair_,
00049                                 const bool seqIsShared)
00050      :  rnaSeq(NULL),
00051         seqShared(seqIsShared),
00052         rnaStructBracketDot(NULL),
00053         bPair(bPair_) 
00054     {
00055         assertbiu(rnaSeq_ != NULL, "no RNA sequence given.");
00056         assertbiu(rnaStructBracketDot_ != NULL, "no RNA structure given.");
00057         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00058         assertbiu(bPair->getAlphabet()->isAlphabetSequence(*rnaSeq_),
00059          "RNA sequence contains characters which are not in the alphabet.");
00060         assertbiu(STRUCT_ALPH.isAlphabetSequence(*rnaStructBracketDot_),
00061          "RNA structure contains characters which are not in the alphabet.");
00062         assertbiu(rnaSeq_->size() == rnaStructBracketDot_->size(),
00063          "RNA sequence and structure have to have the same length.");
00064 
00065         
00066           // init sequence
00067         if (seqShared) {
00068             rnaSeq = rnaSeq_;
00069         } else {
00070             rnaSeq = new Sequence(*rnaSeq_);
00071         }
00072           // init structure
00073         rnaStructBracketDot = new Structure(*rnaStructBracketDot_);
00074 
00075           // init bond data structure
00076         initBonds();
00077     }
00078 
00079     RNAStructure::RNAStructure(const RNAStructure& rnaStruct)
00080      :  rnaSeq(rnaStruct.rnaSeq),
00081         seqShared(rnaStruct.seqShared),
00082         rnaStructBracketDot(new Structure(*(rnaStruct.rnaStructBracketDot))),
00083         rnaBonds(rnaStruct.rnaBonds), bPair(rnaStruct.bPair)
00084     {
00085         assertbiu(bPair != NULL, "bPair is not allowed to be NULL.");
00086         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00087          "RNA sequence and structure have to have the same length.");
00088         
00089         if (!seqShared) {
00090             rnaSeq = new Sequence(*(rnaStruct.rnaSeq));
00091         }
00092     }
00093 
00094     RNAStructure::~RNAStructure() {
00095         if(!seqShared && rnaSeq != NULL) {
00096             delete rnaSeq; rnaSeq = NULL;
00097         }
00098         if (rnaStructBracketDot != NULL) {
00099             delete rnaStructBracketDot; rnaStructBracketDot = NULL;
00100         }
00101     }
00102 
00103     bool        
00104     RNAStructure::isAllowedBasePair(    size_t first,
00105                                         size_t second)  const 
00106     {
00107         if (first == RNAStructure::INVALID_INDEX || second == RNAStructure::INVALID_INDEX)
00108             return false;
00109         assertbiu(first < rnaSeq->size() && second < rnaSeq->size(),
00110          "First or second is not a base position in the RNA sequence.");
00111         return bPair->allowedBasePair((*rnaSeq)[first], (*rnaSeq)[second]);
00112     }
00113 
00114     RNAStructure&   
00115     RNAStructure::operator= (const RNAStructure& rnaStruct2) {
00116         assertbiu(  seqShared == rnaStruct2.seqShared, 
00117                     "sequence of both has to be shared or not");
00118         if (this != &rnaStruct2) {
00119               // copy seq
00120             if (seqShared) { // copy only pointer
00121                 rnaSeq = rnaStruct2.rnaSeq;
00122             } else { // copy
00123                 rnaSeq->resize(rnaStruct2.rnaSeq->size());
00124                 std::copy(  rnaStruct2.rnaSeq->begin(),
00125                             rnaStruct2.rnaSeq->end(),
00126                             rnaSeq->begin());
00127             }
00128               // copy structure
00129             rnaStructBracketDot->resize(rnaStruct2.rnaStructBracketDot->size());
00130             std::copy(  rnaStruct2.rnaStructBracketDot->begin(),
00131                         rnaStruct2.rnaStructBracketDot->end(),
00132                         rnaStructBracketDot->begin());
00133               // copy remaining data
00134             bPair = rnaStruct2.bPair;
00135             rnaBonds = rnaStruct2.rnaBonds;
00136         }
00137         return *this;
00138     }
00139 
00140     bool        
00141     RNAStructure::operator== (const RNAStructure& rnaStruct2)
00142              const {
00143         return (rnaSeq == rnaStruct2.rnaSeq || *rnaSeq == *(rnaStruct2.rnaSeq))
00144             && *rnaStructBracketDot == *(rnaStruct2.rnaStructBracketDot)
00145             && rnaBonds == rnaStruct2.rnaBonds
00146             && *bPair == *(rnaStruct2.bPair);
00147     }
00148 
00149     bool        
00150     RNAStructure::operator!= (const RNAStructure& rnaStruct2)
00151              const {
00152         return (rnaSeq != rnaStruct2.rnaSeq && *rnaSeq != *(rnaStruct2.rnaSeq))
00153             || *rnaStructBracketDot != *(rnaStruct2.rnaStructBracketDot)
00154             || rnaBonds != rnaStruct2.rnaBonds
00155             || *bPair != *(rnaStruct2.bPair);
00156     }
00157 
00158     bool        
00159     RNAStructure::hasValidStructure() const {
00160         int count = 0;  // counter for open bonds
00161         // each opening bracket has to have a closing bracket
00162         for (size_t i=0; i < rnaStructBracketDot->size(); i++) {
00163             if ((*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00164                 count++;
00165             }
00166             else if ((*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE) {
00167                 count--;
00168             }
00169             if (count < 0) {
00170                 return false;
00171             }
00172         }
00173         return (count == 0);
00174     }
00175 
00176     bool        
00177     RNAStructure::hasValidBasePairs() const{
00178         for (size_t i=0; i < rnaBonds.size(); i++) {
00179             // existing bond has to be an allowed base pair
00180             if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00181                 && !isAllowedBasePair(i, rnaBonds[i])) {
00182                 return false;
00183             }
00184         }
00185         return true;
00186     }   
00187 
00188     bool        
00189     RNAStructure::hasMinLoopLength() const {
00190         for (size_t i=0; i < rnaBonds.size(); i++) {
00191             // existing bond has to have at least MIN_LENGTH_LOOP
00192             if (rnaBonds[i] != RNAStructure::INVALID_INDEX
00193                 && (rnaBonds[i]-i) <= RNAStructure::MIN_LOOP_LENGTH) {
00194                 return false;
00195             }
00196         }
00197         return true;
00198     }
00199 
00200 
00201     bool        
00202     RNAStructure::isValid() const{
00203         assertbiu(rnaSeq->size() == rnaStructBracketDot->size(),
00204                  "RNA sequence and structure have to have the same length.");
00205         return (    hasValidStructure() && hasMinLoopLength()
00206                     && hasValidBasePairs() );
00207     }
00208 
00209     std::string 
00210     RNAStructure::getStringRepresentation() const {
00211         return ( getStructureString() + "(" + getSequenceString() + ")" );
00212     }
00213     
00214     void
00215     RNAStructure::initBonds() {
00216           // resize and initialise bonds vector
00217         rnaBonds = std::vector<size_t>
00218                (rnaStructBracketDot->size(), RNAStructure::INVALID_INDEX);
00219 
00220           // do initialization only if structure is valid 
00221         if (!hasValidStructure()) {
00222             return;
00223         }
00224           // std::stack openingBonds contains opening bonds to create
00225           // vector rnaBonds during intitialisation
00226         std::stack<size_t>  openingBonds;
00227         
00228           // do update for real initialisation
00229         for (size_t i=0; i <  rnaStructBracketDot->size(); i++) {
00230             if ( (*rnaStructBracketDot)[i] == STRUCT_BND_OPEN ) {
00231                 openingBonds.push(i);
00232             }
00233             else if ( (*rnaStructBracketDot)[i] == STRUCT_BND_CLOSE ) {
00234                 assertbiu(!openingBonds.empty(),
00235                  "Closing without opening bond.");
00236                 // check whether pairing of bases allowed   
00237                 rnaBonds[openingBonds.top()] = i;
00238                 openingBonds.pop();
00239             }
00240         }
00241         assertbiu(openingBonds.empty(), "Opening without closing bond.");
00242     }
00243 
00244 } // namespace biu