LocARNA-1.9.2
src/LocARNA/arc_matches.hh
00001 #ifndef LOCARNA_ARC_MATCHES_HH
00002 #define LOCARNA_ARC_MATCHES_HH
00003 
00004 #ifdef HAVE_CONFIG_H
00005 #include <config.h>
00006 #endif
00007 
00008 #include <algorithm>
00009 #include <vector>
00010 
00011 #include "scoring_fwd.hh"
00012 #include "aux.hh"
00013 #include "matrix.hh"
00014 #include "basepairs.hh"
00015 
00016 #include <assert.h>
00017 
00018 namespace LocARNA {
00019     class Scoring;
00020     class Sequence;
00021     class RnaData;
00022     class AnchorConstraints;
00023     class TraceController;
00024     class MatchController;
00025 
00034     class ArcMatch {
00035     public:
00036         typedef std::vector<int>::size_type size_type; 
00037         typedef size_type idx_type;                    
00038         typedef BasePairs__Arc Arc;                    
00039     private:
00040         const Arc *arcA_; 
00041         const Arc *arcB_; 
00042         idx_type idx_;    
00043 
00044     public:
00052         ArcMatch(const Arc *arcA, const Arc *arcB, idx_type idx)
00053             : arcA_(arcA), arcB_(arcB), idx_(idx) {}
00054 
00060         const Arc &
00061         arcA() const {
00062             return *arcA_;
00063         }
00064 
00070         const Arc &
00071         arcB() const {
00072             return *arcB_;
00073         }
00074 
00080         idx_type
00081         idx() const {
00082             return idx_;
00083         }
00084     };
00085 
00087     typedef std::vector<ArcMatch> ArcMatchVec;
00088 
00090     typedef std::vector<ArcMatch::idx_type> ArcMatchIdxVec;
00091 
00115     class ArcMatches {
00116     public:
00117         typedef std::vector<int>::size_type size_type; 
00118         typedef BasePairs__Arc Arc;                    
00119     protected:
00120         size_type lenA; 
00121         size_type lenB; 
00122 
00123         BasePairs *bpsA; 
00124         BasePairs *bpsB; 
00125 
00126         /* Constraints and Heuristics */
00127 
00128         size_type max_length_diff; 
00129 
00130         size_type max_diff_at_am; 
00131 
00132         const MatchController &match_controller; 
00133 
00134 
00135         const AnchorConstraints &constraints; 
00136 
00149         bool
00150         is_valid_arcmatch(const Arc &arcA, const Arc &arcB) const;
00151 
00152         /* END constraints and heuristics */
00153 
00154         bool maintain_explicit_scores; 
00155 
00156 
00157 
00159         ArcMatchVec arc_matches_vec;
00160 
00165         size_type number_of_arcmatches;
00166 
00168         std::vector<score_t> scores;
00169 
00172         Matrix<ArcMatchIdxVec> common_right_end_lists;
00173 
00176         Matrix<ArcMatchIdxVec> common_left_end_lists;
00177 
00179         ArcMatchIdxVec inner_arcmatch_idxs;
00180 
00182         void
00183         init_inner_arc_matchs();
00184 
00187         class lex_greater_left_ends {
00188             const ArcMatches &arc_matches;
00189 
00190         public:
00196             explicit lex_greater_left_ends(const ArcMatches &arc_matches_)
00197                 : arc_matches(arc_matches_) {}
00198 
00209             bool
00210             operator()(const ArcMatch::idx_type &i,
00211                        const ArcMatch::idx_type &j) const {
00212                 size_type ali = arc_matches.arcmatch(i).arcA().left();
00213                 size_type bli = arc_matches.arcmatch(i).arcB().left();
00214                 size_type alj = arc_matches.arcmatch(j).arcA().left();
00215                 size_type blj = arc_matches.arcmatch(j).arcB().left();
00216 
00217                 return (ali > alj) || (ali == alj && bli > blj);
00218             }
00219         };
00220 
00225         class tuple5 {
00226         public:
00227             typedef std::vector<int>::size_type size_type; 
00228 
00229             size_type i;   
00230             size_type j;   
00231             size_type k;   
00232             size_type l;   
00233             score_t score; 
00234 
00235 
00245             tuple5(size_type i_,
00246                    size_type j_,
00247                    size_type k_,
00248                    size_type l_,
00249                    score_t score_)
00250                 : i(i_), j(j_), k(k_), l(l_), score(score_) {}
00251         };
00252 
00253     public:
00276         ArcMatches(const Sequence &seqA_,
00277                    const Sequence &seqB_,
00278                    const std::string &arcmatch_scores_file,
00279                    int probability_scale,
00280                    size_type max_length_diff,
00281                    size_type max_diff_at_am,
00282                    const MatchController &trace_controller,
00283                    const AnchorConstraints &constraints);
00284 
00306         ArcMatches(const RnaData &rnadataA,
00307                    const RnaData &rnadataB,
00308                    double min_prob,
00309                    size_type max_length_diff,
00310                    size_type max_diff_at_am,
00311                    const MatchController &trace_controller,
00312                    const AnchorConstraints &constraints);
00313 
00315         ~ArcMatches();
00316 
00317         // for the mea probabilistic consistency transformation, support to read
00318         // and write the arcmatch scores
00319         // this allows in general to have user defined arc-match scores
00320 
00333         void
00334         read_arcmatch_scores(const std::string &arcmatch_scores_file,
00335                              int probability_scale);
00336 
00342         void
00343         write_arcmatch_scores(const std::string &arcmatch_scores_file,
00344                               const Scoring &scoring) const;
00345 
00347         const BasePairs &
00348         get_base_pairsA() const {
00349             return *bpsA;
00350         }
00351 
00353         const BasePairs &
00354         get_base_pairsB() const {
00355             return *bpsB;
00356         }
00357 
00360         bool
00361         explicit_scores() const {
00362             return maintain_explicit_scores;
00363         }
00364 
00370         void
00371         make_scores_explicit(const Scoring &scoring);
00372 
00379         score_t
00380         get_score(const ArcMatch &am) const {
00381             assert(maintain_explicit_scores);
00382             return scores[am.idx()];
00383         }
00384 
00386         size_type
00387         num_arc_matches() const {
00388             return number_of_arcmatches;
00389         }
00390 
00392         const ArcMatch &
00393         arcmatch(size_type idx) const {
00394             assert(idx < number_of_arcmatches);
00395             return arc_matches_vec[idx];
00396         }
00397 
00398         // ============================================================
00399         // Iteration over arc matches
00400         //
00401 
00403         const ArcMatchIdxVec &
00404         common_right_end_list(size_type i, size_type j) const {
00405             return common_right_end_lists(i, j);
00406         }
00407 
00409         const ArcMatchIdxVec &
00410         common_left_end_list(size_type i, size_type j) const {
00411             return common_left_end_lists(i, j);
00412         }
00413 
00414         // ============================================================
00415 
00437         void
00438         get_max_right_ends(size_type al,
00439                            size_type bl,
00440                            size_type *max_ar,
00441                            size_type *max_br,
00442                            bool no_lonely_pairs) const;
00443 
00450         void
00451         get_min_right_ends(size_type al,
00452                            size_type bl,
00453                            size_type *min_ar,
00454                            size_type *min_br) const;
00455 
00456         // ------------------------------------------------------------
00457         // inner arc matches
00458 
00463         bool
00464         exists_inner_arc_match(const ArcMatch &am) const {
00465             return inner_arcmatch_idxs[am.idx()] < num_arc_matches();
00466         }
00467 
00473         const ArcMatch &
00474         inner_arc_match(const ArcMatch &am) const {
00475             return arcmatch(inner_arcmatch_idxs[am.idx()]);
00476         }
00477 
00484         void
00485         sort_right_adjacency_lists();
00486 
00487         // ------------------------------------------------------------
00488         // iteration (in no specific order)
00489 
00491         typedef ArcMatchVec::const_iterator const_iterator;
00492 
00494         const_iterator
00495         begin() const {
00496             return arc_matches_vec.begin();
00497         }
00498 
00500         const_iterator
00501         end() const {
00502             return arc_matches_vec.begin() + number_of_arcmatches;
00503         }
00504     };
00505 
00513     class ArcMatchesIndexed : public ArcMatches {
00514     public:
00537         ArcMatchesIndexed(const Sequence &seqA_,
00538                           const Sequence &seqB_,
00539                           const std::string &arcmatch_scores_file,
00540                           int probability_scale,
00541                           size_type max_length_diff,
00542                           size_type max_diff_at_am,
00543                           const MatchController &trace_controller,
00544                           const AnchorConstraints &constraints)
00545             : ArcMatches(seqA_,
00546                          seqB_,
00547                          arcmatch_scores_file,
00548                          probability_scale,
00549                          max_length_diff,
00550                          max_diff_at_am,
00551                          trace_controller,
00552                          constraints),
00553               am_index_() {
00554             build_arcmatch_index();
00555         }
00556 
00578         ArcMatchesIndexed(const RnaData &rnadataA,
00579                           const RnaData &rnadataB,
00580                           double min_prob,
00581                           size_type max_length_diff,
00582                           size_type max_diff_at_am,
00583                           const MatchController &trace_controller,
00584                           const AnchorConstraints &constraints)
00585             : ArcMatches(rnadataA,
00586                          rnadataB,
00587                          min_prob,
00588                          max_length_diff,
00589                          max_diff_at_am,
00590                          trace_controller,
00591                          constraints),
00592               am_index_() {
00593             build_arcmatch_index();
00594         }
00595 
00596     private:
00598         typedef std::pair<size_type, size_type> idx_pair_t;
00599 
00601         typedef unordered_map<idx_pair_t,
00602                               ArcMatch::idx_type,
00603                               pair_of_size_t_hash>::type am_index_type;
00604 
00606         am_index_type am_index_;
00607 
00612         void
00613         build_arcmatch_index();
00614 
00615     public:
00624         const ArcMatch::idx_type
00625         invalid_am_index() const {
00626             // The invalid arc match index is implemented to be the
00627             // maximum valid index + 1.
00628             // This allows an efficient implementation, where we push
00629             // an invalid arc match to the end of vector arc_matches_vec.
00630             // Thus, we return size-1!
00631 
00632             return number_of_arcmatches;
00633         }
00634 
00643         const ArcMatch::idx_type
00644         am_index(const size_type &arcAIdx, const size_type &arcBIdx) const {
00645             am_index_type::const_iterator it =
00646                 am_index_.find(idx_pair_t(arcAIdx, arcBIdx));
00647             if (am_index_.end() != it) {
00648                 return it->second;
00649             } else {
00650                 return invalid_am_index();
00651             }
00652         }
00653 
00663         const ArcMatch &
00664         am_index(const Arc &arcA, const Arc &arcB) const {
00665             return arc_matches_vec[am_index(arcA.idx(), arcB.idx())];
00666         }
00667     };
00668 
00669 } // end namespace LocARNA
00670 
00671 #endif // LOCARNA_ARC_MATCHES_HH
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends