LocARNA-1.8.11
arc_matches.hh
1 #ifndef LOCARNA_ARC_MATCHES_HH
2 #define LOCARNA_ARC_MATCHES_HH
3 
4 #ifdef HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7 
8 #include <algorithm>
9 #include <vector>
10 
11 #include "scoring_fwd.hh"
12 #include "aux.hh"
13 #include "matrix.hh"
14 #include "basepairs.hh"
15 
16 #include <assert.h>
17 
18 
19 namespace LocARNA {
20  class Scoring;
21  class Sequence;
22  class RnaData;
23  class AnchorConstraints;
24  class TraceController;
25  class MatchController;
26 
35  class ArcMatch {
36  public:
37  typedef std::vector<int>::size_type size_type;
38  typedef size_type idx_type;
39  typedef BasePairs__Arc Arc;
40  private:
41  const Arc *arcA_;
42  const Arc *arcB_;
43  idx_type idx_;
44 
45  public:
46 
54  ArcMatch(const Arc *arcA,const Arc *arcB, idx_type idx)
55  : arcA_(arcA),
56  arcB_(arcB),
57  idx_(idx)
58  {}
59 
65  const Arc &
66  arcA() const {return *arcA_;}
67 
73  const Arc &
74  arcB() const {return *arcB_;}
75 
81  idx_type
82  idx() const {return idx_;}
83  };
84 
86  typedef std::vector<ArcMatch> ArcMatchVec;
87 
89  typedef std::vector<ArcMatch::idx_type> ArcMatchIdxVec;
90 
112  class ArcMatches {
113  public:
114  typedef std::vector<int>::size_type size_type;
115  typedef BasePairs__Arc Arc;
116  protected:
117 
118  size_type lenA;
119  size_type lenB;
120 
123 
124  /* Constraints and Heuristics */
125 
126  size_type max_length_diff;
127 
128  size_type max_diff_at_am;
129 
131 
133 
134 
146  bool is_valid_arcmatch(const Arc &arcA,const Arc &arcB) const;
147 
148  /* END constraints and heuristics */
149 
151 
153  ArcMatchVec arc_matches_vec;
154 
160 
162  std::vector<score_t> scores;
163 
164 
167 
168 
171 
172 
174  ArcMatchIdxVec inner_arcmatch_idxs;
175 
177  void
178  init_inner_arc_matchs();
179 
182  const ArcMatches &arc_matches;
183  public:
184 
190  explicit
191  lex_greater_left_ends(const ArcMatches &arc_matches_)
192  : arc_matches(arc_matches_)
193  {}
194 
205  bool
206  operator () (const ArcMatch::idx_type &i, const ArcMatch::idx_type &j) const {
207  size_type ali = arc_matches.arcmatch(i).arcA().left();
208  size_type bli = arc_matches.arcmatch(i).arcB().left();
209  size_type alj = arc_matches.arcmatch(j).arcA().left();
210  size_type blj = arc_matches.arcmatch(j).arcB().left();
211 
212  return (ali>alj) || (ali==alj && bli>blj);
213  }
214  };
215 
220  class tuple5 {
221  public:
222  typedef std::vector<int>::size_type size_type;
223 
224  size_type i;
225  size_type j;
226  size_type k;
227  size_type l;
229 
239  tuple5(size_type i_,size_type j_,size_type k_,size_type l_,score_t score_)
240  :i(i_), j(j_), k(k_), l(l_), score(score_)
241  {}
242  };
243 
244 
245  public:
246 
247 
266  ArcMatches(const Sequence &seqA_,
267  const Sequence &seqB_,
268  const std::string &arcmatch_scores_file,
269  int probability_scale,
270  size_type max_length_diff,
271  size_type max_diff_at_am,
272  const MatchController &trace_controller,
273  const AnchorConstraints &constraints);
274 
275 
276 
297  ArcMatches(const RnaData &rnadataA,
298  const RnaData &rnadataB,
299  double min_prob,
300  size_type max_length_diff,
301  size_type max_diff_at_am,
302  const MatchController &trace_controller,
303  const AnchorConstraints &constraints);
304 
306  ~ArcMatches();
307 
308  // for the mea probabilistic consistency transformation, support to read and write the arcmatch scores
309  // this allows in general to have user defined arc-match scores
310 
322  void read_arcmatch_scores(const std::string &arcmatch_scores_file, int probability_scale);
323 
324 
329  void write_arcmatch_scores(const std::string &arcmatch_scores_file, const Scoring &scoring) const;
330 
331 
333  const BasePairs &
334  get_base_pairsA() const {
335  return *bpsA;
336  }
337 
339  const BasePairs &
340  get_base_pairsB() const {
341  return *bpsB;
342  }
343 
345  bool
346  explicit_scores() const {
347  return maintain_explicit_scores;
348  }
349 
355  void
356  make_scores_explicit(const Scoring &scoring);
357 
358 
365  score_t
366  get_score(const ArcMatch &am) const {
367  assert(maintain_explicit_scores);
368  return scores[am.idx()];
369  }
370 
372  size_type num_arc_matches() const {
373  return number_of_arcmatches;
374  }
375 
377  const ArcMatch &arcmatch(size_type idx) const {
378  assert(idx<number_of_arcmatches);
379  return arc_matches_vec[idx];
380  }
381 
382  // ============================================================
383  // Iteration over arc matches
384  //
385 
387  const ArcMatchIdxVec &
388  common_right_end_list(size_type i, size_type j) const {
389  return common_right_end_lists(i,j);
390  }
391 
393  const ArcMatchIdxVec &
394  common_left_end_list(size_type i, size_type j) const {
395  return common_left_end_lists(i,j);
396  }
397 
398 
399  // ============================================================
400 
401 
419  void get_max_right_ends(size_type al,size_type bl,size_type *max_ar,size_type *max_br, bool no_lonely_pairs) const;
420 
421 
428  void get_min_right_ends(size_type al,size_type bl,size_type *min_ar,size_type *min_br) const;
429 
430  // ------------------------------------------------------------
431  // inner arc matches
432 
433 
438  bool
439  exists_inner_arc_match(const ArcMatch &am) const {
440  return inner_arcmatch_idxs[am.idx()] < num_arc_matches();
441  }
442 
448  const ArcMatch &
449  inner_arc_match(const ArcMatch &am) const {
450  return arcmatch(inner_arcmatch_idxs[am.idx()]);
451  }
452 
458  void
459  sort_right_adjacency_lists();
460 
461  // ------------------------------------------------------------
462  // iteration (in no specific order)
463 
465  typedef ArcMatchVec::const_iterator const_iterator;
466 
468  const_iterator begin() const {return arc_matches_vec.begin();}
469 
471  const_iterator end() const {return arc_matches_vec.begin()+number_of_arcmatches;}
472 
473  };
474 
475 
483  class ArcMatchesIndexed : public ArcMatches {
484  public:
504  const Sequence &seqB_,
505  const std::string &arcmatch_scores_file,
506  int probability_scale,
507  size_type max_length_diff,
508  size_type max_diff_at_am,
509  const MatchController &trace_controller,
510  const AnchorConstraints &constraints)
511  :ArcMatches(seqA_,seqB_,
512  arcmatch_scores_file,
513  probability_scale,
514  max_length_diff,
515  max_diff_at_am,
516  trace_controller,
517  constraints),
518  am_index_()
519  {
520  build_arcmatch_index();
521  }
522 
543  ArcMatchesIndexed(const RnaData &rnadataA,
544  const RnaData &rnadataB,
545  double min_prob,
546  size_type max_length_diff,
547  size_type max_diff_at_am,
548  const MatchController &trace_controller,
549  const AnchorConstraints &constraints)
550  :ArcMatches(rnadataA,
551  rnadataB,
552  min_prob,
553  max_length_diff,
554  max_diff_at_am,
555  trace_controller,
556  constraints),
557  am_index_()
558  {
559  build_arcmatch_index();
560  }
561 
562  private:
564  typedef std::pair<size_type,size_type> idx_pair_t;
565 
568 
570  am_index_type am_index_;
571 
576  void
577  build_arcmatch_index();
578 
579 
580  public:
581 
591  // The invalid arc match index is implemented to be the
592  // maximum valid index + 1.
593  // This allows an efficient implementation, where we push
594  // an invalid arc match to the end of vector arc_matches_vec.
595  // Thus, we return size-1!
596 
597  return number_of_arcmatches;
598  }
599 
608  const ArcMatch::idx_type
609  am_index(const size_type &arcAIdx,const size_type &arcBIdx) const {
610  am_index_type::const_iterator it = am_index_.find(idx_pair_t(arcAIdx,arcBIdx));
611  if (am_index_.end() != it) {
612  return it->second;
613  } else {
614  return invalid_am_index();
615  }
616  }
617 
626  const ArcMatch &
627  am_index(const Arc &arcA,const Arc &arcB) const {
628  return arc_matches_vec[am_index(arcA.idx(),arcB.idx())];
629  }
630  };
631 
632 } // end namespace LocARNA
633 
634 #endif // LOCARNA_ARC_MATCHES_HH
represent sparsified data of RNA ensemble
Definition: rna_data.hh:42
Represents anchor constraints between two sequences.
Definition: anchor_constraints.hh:26
size_t left() const
Definition: basepairs.hh:72
size_type idx_type
arc match index
Definition: arc_matches.hh:38
const ArcMatch & am_index(const Arc &arcA, const Arc &arcB) const
Lookup arc match by pair of arcs.
Definition: arc_matches.hh:627
const MatchController & match_controller
allowed alignment traces by max-diff heuristics
Definition: arc_matches.hh:130
std::vector< int >::size_type size_type
size type
Definition: arc_matches.hh:37
const ArcMatch & inner_arc_match(const ArcMatch &am) const
Definition: arc_matches.hh:449
Matrix< ArcMatchIdxVec > common_right_end_lists
for each (i,j) maintain vector of the indices of the arc matchs that share the common right end (i...
Definition: arc_matches.hh:166
bool explicit_scores() const
true, if arc match scores are explicit (because they are read in from a list)
Definition: arc_matches.hh:346
ArcMatch(const Arc *arcA, const Arc *arcB, idx_type idx)
Definition: arc_matches.hh:54
size_type k
position k
Definition: arc_matches.hh:226
std::vector< score_t > scores
vector of scores (of arc matches with the same index)
Definition: arc_matches.hh:162
idx_type idx() const
Definition: arc_matches.hh:82
Definition: arc_matches.hh:220
std::vector< ArcMatch > ArcMatchVec
Vector of arc matches.
Definition: arc_matches.hh:86
ArcMatchesIndexed(const Sequence &seqA_, const Sequence &seqB_, const std::string &arcmatch_scores_file, int probability_scale, size_type max_length_diff, size_type max_diff_at_am, const MatchController &trace_controller, const AnchorConstraints &constraints)
construct with explicit arc match score list
Definition: arc_matches.hh:503
const Arc & arcA() const
Definition: arc_matches.hh:66
BasePairs__Arc Arc
arc
Definition: arc_matches.hh:115
const ArcMatchIdxVec & common_left_end_list(size_type i, size_type j) const
list of all arc matches that share the common left end (i,j)
Definition: arc_matches.hh:394
class ArcMatches with additional mapping
Definition: arc_matches.hh:483
size_type number_of_arcmatches
Definition: arc_matches.hh:159
std::vector< int >::size_type size_type
size type
Definition: arc_matches.hh:222
size_type j
position j
Definition: arc_matches.hh:225
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:112
std::vector< ArcMatch::idx_type > ArcMatchIdxVec
Vector of arc match indices.
Definition: arc_matches.hh:89
const AnchorConstraints & constraints
for constraints
Definition: arc_matches.hh:132
tuple5(size_type i_, size_type j_, size_type k_, size_type l_, score_t score_)
Definition: arc_matches.hh:239
std::vector< int >::size_type size_type
size
Definition: arc_matches.hh:114
Definition: aligner.cc:17
ArcMatchesIndexed(const RnaData &rnadataA, const RnaData &rnadataB, double min_prob, size_type max_length_diff, size_type max_diff_at_am, const MatchController &trace_controller, const AnchorConstraints &constraints)
construct from single base pair probabilities.
Definition: arc_matches.hh:543
const ArcMatchIdxVec & common_right_end_list(size_type i, size_type j) const
list of all arc matches that share the common right end (i,j)
Definition: arc_matches.hh:388
bool exists_inner_arc_match(const ArcMatch &am) const
Definition: arc_matches.hh:439
BasePairs * bpsA
base pairs of RNA A
Definition: arc_matches.hh:121
ArcMatchVec arc_matches_vec
vector of all maintained arc matches
Definition: arc_matches.hh:153
Compare two arc match indices by lexicographically comparing their left ends.
Definition: arc_matches.hh:181
score_t get_score(const ArcMatch &am) const
Definition: arc_matches.hh:366
ArcMatchVec::const_iterator const_iterator
const iterator over arc matches
Definition: arc_matches.hh:465
const Arc & arcB() const
Definition: arc_matches.hh:74
size_type lenA
length of sequence A
Definition: arc_matches.hh:118
ArcMatchIdxVec inner_arcmatch_idxs
vector of indices of inner arc matches
Definition: arc_matches.hh:174
const ArcMatch::idx_type am_index(const size_type &arcAIdx, const size_type &arcBIdx) const
Lookup arc match index by pair of arc indices.
Definition: arc_matches.hh:609
const ArcMatch::idx_type invalid_am_index() const
the invalid arc match index
Definition: arc_matches.hh:590
size_type num_arc_matches() const
total number of arc matches
Definition: arc_matches.hh:372
BasePairs * bpsB
base pairs of RNA B
Definition: arc_matches.hh:122
size_type max_length_diff
for max-diff-am heuristics
Definition: arc_matches.hh:126
size_type max_diff_at_am
for max diff at arc matches heuristics
Definition: arc_matches.hh:128
const ArcMatch & arcmatch(size_type idx) const
get arc match by its index
Definition: arc_matches.hh:377
Represents a base pair.
Definition: basepairs.hh:40
score_t score
the score (as used below: score of arc match (i,j)~(k,l))
Definition: arc_matches.hh:228
lex_greater_left_ends(const ArcMatches &arc_matches_)
Definition: arc_matches.hh:191
const BasePairs & get_base_pairsA() const
returns the base pairs object for RNA A
Definition: arc_matches.hh:334
size_type l
position l
Definition: arc_matches.hh:227
Provides methods for the scoring of alignments.
Definition: scoring.hh:285
const_iterator end() const
end of arc matches vector
Definition: arc_matches.hh:471
BasePairs__Arc Arc
arc
Definition: arc_matches.hh:39
size_t idx() const
Definition: basepairs.hh:88
abstract class that declares the method is_valid_match()
Definition: trace_controller.hh:147
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:107
Definition: aux.hh:51
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:29
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35
Matrix< ArcMatchIdxVec > common_left_end_lists
for each (i,j) maintain vector of the indices of the arc matchs that share the common left end (i...
Definition: arc_matches.hh:170
size_type lenB
length of sequence B
Definition: arc_matches.hh:119
size_type i
position i
Definition: arc_matches.hh:224
const_iterator begin() const
begin of arc matches vector
Definition: arc_matches.hh:468
const BasePairs & get_base_pairsB() const
returns the base pairs object for RNA B
Definition: arc_matches.hh:340
bool maintain_explicit_scores
whether scores are maintained explicitely or computed from pair probabilities
Definition: arc_matches.hh:150