LocARNA-1.9.2
src/LocARNA/sparsification_mapper.hh
00001 #ifndef SPARSIFICATION_MAPPER_HH
00002 #define SPARSIFICATION_MAPPER_HH
00003 
00004 #ifdef HAVE_CONFIG_H
00005 #include <config.h>
00006 #endif
00007 
00008 #include <iostream>
00009 #include <limits>
00010 
00011 #include "aux.hh"
00012 #include "basepairs.hh"
00013 #include "ext_rna_data.hh"
00014 
00015 // use for type safe index_t
00016 // #include "type_wrapper.hh"
00017 
00018 namespace LocARNA {
00019 
00020     // print a pair
00021     template <class T1, class T2>
00022     std::ostream &
00023     operator<<(std::ostream &out, const std::pair<T1, T2> &pair) {
00024         return out << "(" << pair.first << "," << pair.second << ") ";
00025     }
00026 
00033     class SparsificationMapper {
00034     public:
00035         typedef BasePairs__Arc Arc;            
00036         typedef size_t ArcIdx;                 
00037         typedef std::vector<ArcIdx> ArcIdxVec; 
00038         typedef pos_type matidx_t;             
00039         typedef pos_type seq_pos_t;            
00040 
00041         // note: the type safe index_t breaks current code, since
00042         // casts to size_t need to be explicite (using index_t's val()-method)
00043         // //! type-safe index type this is useful to distinguish index type
00044         // //! from other types that are defined as unsigned int
00045         // typedef type_wrapper<size_t> index_t;
00046 
00047         typedef size_t index_t; 
00048 
00051         struct info_for_pos {
00052             seq_pos_t seq_pos; 
00053             bool
00054                 unpaired; 
00055             ArcIdxVec valid_arcs; 
00056 
00057 
00059             void
00060             reset() {
00061                 this->seq_pos = 0;
00062                 this->valid_arcs.clear();
00063                 this->unpaired = false;
00064             }
00065         };
00066 
00067         typedef std::vector<info_for_pos> InfoForPosVec; 
00068 
00069 
00070 
00071 
00072 
00073 
00074     private:
00075         const BasePairs &bps;                         
00076         const ExtRnaData &rnadata;                    
00077         const double prob_unpaired_in_loop_threshold; 
00078 
00079 
00080         const double prob_basepair_in_loop_threshold; 
00081 
00082         size_type max_info_vec_size; 
00083 
00084 
00085 
00086         std::vector<InfoForPosVec> info_valid_seq_pos_vecs;
00087 
00091         std::vector<std::vector<matidx_t> > valid_mat_pos_vecs_before_eq;
00092 
00096         std::vector<std::vector<ArcIdxVec> > left_adj_vec;
00097 
00100         void
00101         compute_mapping_idx_arcs();
00102 
00105         void
00106         compute_mapping_idx_left_ends();
00107 
00112         void
00113         iterate_left_adj_list(pos_type cur_left_end,
00114                               pos_type cur_pos,
00115                               const Arc *inner_arc,
00116                               info_for_pos &struct_pos);
00117 
00118         void
00119         valid_pos_external(pos_type cur_pos,
00120                            const Arc *inner_arc,
00121                            info_for_pos &struct_pos);
00122 
00123     public:
00138         SparsificationMapper(const BasePairs &bps_,
00139                              const ExtRnaData &rnadata_,
00140                              double prob_unpaired_in_loop_threshold_,
00141                              double prob_basepair_in_loop_threshold_,
00142                              bool index_left_ends)
00143             : bps(bps_),
00144               rnadata(rnadata_),
00145               prob_unpaired_in_loop_threshold(prob_unpaired_in_loop_threshold_),
00146               prob_basepair_in_loop_threshold(prob_basepair_in_loop_threshold_),
00147               max_info_vec_size(0) {
00148             if (index_left_ends) {
00149                 compute_mapping_idx_left_ends();
00150             } else {
00151                 compute_mapping_idx_arcs();
00152             }
00153         }
00154 
00158         size_type
00159         get_max_info_vec_size() const {
00160             return max_info_vec_size;
00161         }
00162 
00169         const InfoForPosVec &
00170         valid_seq_positions(index_t idx) const {
00171             return info_valid_seq_pos_vecs.at(idx);
00172         }
00173 
00180         const ArcIdxVec &
00181         valid_arcs_right_adj(index_t idx, matidx_t pos) const {
00182             return info_valid_seq_pos_vecs.at(idx).at(pos).valid_arcs;
00183         }
00184 
00196         matidx_t
00197         first_valid_mat_pos_before_eq(
00198             index_t index,
00199             seq_pos_t pos,
00200             index_t left_end = std::numeric_limits<index_t>::max()) const {
00201             if (left_end == std::numeric_limits<index_t>::max())
00202                 left_end = index;
00203             assert(pos >= left_end); // tocheck
00204             return valid_mat_pos_vecs_before_eq.at(index).at(pos - left_end);
00205         }
00206 
00216         inline matidx_t
00217         first_valid_mat_pos_before(
00218             index_t index,
00219             seq_pos_t pos,
00220             index_t left_end = std::numeric_limits<index_t>::max()) const {
00221             // if (left_end == std::numeric_limits<index_t>::max()) assert (pos
00222             // > index);
00223             return first_valid_mat_pos_before_eq(index, pos - 1, left_end);
00224         }
00225 
00233         inline seq_pos_t
00234         get_pos_in_seq_new(index_t idx, matidx_t pos) const {
00235             assert(pos < number_of_valid_mat_pos(idx));
00236             return (
00237                 info_valid_seq_pos_vecs.at(idx).at(pos).seq_pos); //+arc.left();
00238         }
00239 
00245         size_type
00246         number_of_valid_mat_pos(index_t idx) const {
00247             return info_valid_seq_pos_vecs.at(idx).size();
00248         }
00249 
00257         bool
00258         pos_unpaired(index_t idx, matidx_t pos) const {
00259             return info_valid_seq_pos_vecs.at(idx).at(pos).unpaired;
00260         }
00261 
00274         bool
00275         matching_without_gap_possible(const Arc &arc, seq_pos_t pos) const {
00276             const pos_type &mat_pos =
00277                 first_valid_mat_pos_before(arc.idx(), pos, arc.left());
00278             return get_pos_in_seq_new(arc.idx(), mat_pos) == pos - 1;
00279         }
00280 
00288         const ArcIdxVec &
00289         valid_arcs_left_adj(const Arc &arc, seq_pos_t pos) const {
00290             return left_adj_vec.at(arc.idx()).at(pos - arc.left());
00291         }
00292 
00294         virtual ~SparsificationMapper() {}
00295 
00307         matidx_t
00308         idx_geq(index_t index,
00309                 seq_pos_t min_col,
00310                 index_t left_end = std::numeric_limits<index_t>::max()) const {
00311             if (left_end == std::numeric_limits<index_t>::max())
00312                 left_end = index;
00313 
00314             size_t num_pos = number_of_valid_mat_pos(index);
00315             seq_pos_t last_mapped_seq_pos =
00316                 get_pos_in_seq_new(index, num_pos - 1);
00317 
00318             // if the position min_col is smaller than or equal to the left end
00319             // (first mapped position), return 0
00320             if (min_col <= left_end)
00321                 return 0;
00322 
00323             // if the position min_col is greater than the last mapped sequence
00324             // position, the position does not exists
00325             // return the number of valid positions
00326             if (min_col > last_mapped_seq_pos)
00327                 return num_pos;
00328 
00329             // the matrix position after the first valid position before the
00330             // position min_col (first matrix position that
00331             // stores a sequence position that is greater than or equal to
00332             // min_col-1)
00333             matidx_t idx_geq =
00334                 first_valid_mat_pos_before_eq(index, min_col - 1, left_end) + 1;
00335 
00336             assert(get_pos_in_seq_new(index, idx_geq) >= min_col &&
00337                    !(get_pos_in_seq_new(index, idx_geq - 1) >= min_col));
00338 
00339             return idx_geq;
00340         }
00341 
00352         matidx_t
00353         idx_after_leq(
00354             index_t index,
00355             seq_pos_t max_col,
00356             index_t left_end = std::numeric_limits<index_t>::max()) const {
00357             if (left_end == std::numeric_limits<index_t>::max())
00358                 left_end = index;
00359 
00360             size_t num_pos = number_of_valid_mat_pos(index);
00361             seq_pos_t last_mapped_seq_pos =
00362                 get_pos_in_seq_new(index, num_pos - 1);
00363 
00364             // if the position max_col is smaller than the left_end (first
00365             // mapped position), return 0
00366             if (max_col < left_end)
00367                 return 0;
00368 
00369             // if the position max_col is greater than or equal to the last
00370             // mapped sequence position,
00371             // return the number of positions
00372             if (max_col >= last_mapped_seq_pos)
00373                 return num_pos;
00374 
00375             // the matrix position after the first matrix position before or
00376             // equal the sequence position max_col
00377             // (last matrix position that stores a sequence position that is
00378             // lower than or equal max_col)
00379             matidx_t idx_after_leq =
00380                 first_valid_mat_pos_before_eq(index, max_col, left_end) + 1;
00381 
00382             assert(get_pos_in_seq_new(index, idx_after_leq - 1) <= max_col &&
00383                    !(get_pos_in_seq_new(index, idx_after_leq) <= max_col));
00384 
00385             return idx_after_leq;
00386         }
00387 
00388     private:
00399         bool
00400         is_valid_arc(const Arc &inner_arc, const Arc &arc) const {
00401             assert(inner_arc.left() > arc.left() &&
00402                    inner_arc.right() < arc.right());
00403             return rnadata.arc_in_loop_prob(inner_arc.left(), inner_arc.right(),
00404                                             arc.left(), arc.right()) >=
00405                 prob_basepair_in_loop_threshold;
00406         }
00407 
00416         bool
00417         is_valid_arc_external(const Arc &inner_arc) const {
00418             return rnadata.arc_external_prob(inner_arc.left(),
00419                                              inner_arc.right()) >=
00420                 prob_basepair_in_loop_threshold;
00421         }
00422 
00423     public:
00434         bool
00435         is_valid_pos(const Arc &arc, seq_pos_t pos) const {
00436             assert(arc.left() < pos && pos < arc.right());
00437             return rnadata.unpaired_in_loop_prob(pos, arc.left(),
00438                                                  arc.right()) >=
00439                 prob_unpaired_in_loop_threshold; // todo: additional variable
00440                                                  // for external case
00441         }
00442 
00449         bool
00450         is_valid_pos_external(seq_pos_t pos) const {
00451             return rnadata.unpaired_external_prob(pos) >=
00452                 prob_unpaired_in_loop_threshold; // todo: additional variable
00453                                                  // for external case
00454         }
00455     };
00456 
00463     template <class T>
00464     std::ostream &
00465     operator<<(std::ostream &out, const std::vector<T> &vec) {
00466         for (typename std::vector<T>::const_iterator it = vec.begin();
00467              it != vec.end(); it++) {
00468             out << *it << " ";
00469         }
00470         return out;
00471     }
00472 
00480     std::ostream &
00481     operator<<(
00482         std::ostream &out,
00483         const std::vector<SparsificationMapper::InfoForPosVec> &pos_vecs_);
00484 
00492     std::ostream &
00493     operator<<(std::ostream &out,
00494                const SparsificationMapper::InfoForPosVec &pos_vec_);
00495 
00496 } // end namespace
00497 
00498 #endif //  SPARSIFICATION_MAPPER_HH
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends