LocARNA-1.8.11
scoring.hh
1 #ifndef LOCARNA_SCORING_HH
2 #define LOCARNA_SCORING_HH
3 
4 #ifdef HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7 
8 #include <math.h>
9 #include <vector>
10 
11 #include "aux.hh"
12 
13 #include "scoring_fwd.hh"
14 #include "matrix.hh"
15 
16 #ifndef NDEBUG
17 #include "sequence.hh"
18 #endif
19 
20 namespace LocARNA {
21 
22 
23  //#define MEA_SCORING_OLD
24 
25 
26  class RibosumFreq;
27  class Ribofit;
28  class Scoring;
29  class Sequence;
30  class BasePairs;
31  class BasePairs__Arc;
32  class ArcMatches;
33  class ArcMatch;
34  class MatchProbs;
35  class RnaData;
36 
38  typedef std::vector<infty_score_t> ScoreVector;
39 
41  typedef std::vector<pf_score_t> PFScoreVector;
42 
43 
46 
49 
51  typedef Matrix<double> ProbMatrix;
52 
53 
54 
55 
65  class ScoringParams {
66  public:
74 
77 
79  const score_t indel;
80 
83 
86 
89 
96 
103  const Ribofit *ribofit;
104 
107 
113 
119 
122 
123  const double exp_probA;
124 
125  const double exp_probB;
126 
127  const double temperature;
128 
129 
131  const bool stacking;
132 
134  const bool new_stacking;
135 
137  const bool mea_scoring;
138 
141 
144 
147 
148 
157 
158  /*
159  The mea score is composed in the following way:
160 
161  params->probability_scale *
162  (
163  sum_basematchs (i,j)
164  P(i~j)
165  + alpha_factor/100 * (P(i unstructured) + P(j unstructured))
166  +
167  sum_arcmatchs (a,b)
168  beta_factor/100 * (P(a)+P(b)) * P(al~bl) * P(ar~br) * ribosum_arcmatch(a,b)
169  )
170  */
171 
172  public:
173 
200  ScoringParams(score_t basematch_,
201  score_t basemismatch_,
202  score_t indel_,
203  score_t indel_loop_,
204  score_t indel_opening_,
205  score_t indel_opening_loop_,
206  RibosumFreq *ribosum_,
207  Ribofit *ribofit_,
208  score_t unpaired_penalty_,
209  score_t struct_weight_,
210  score_t tau_factor_,
211  score_t exclusion_,
212  double exp_probA_,
213  double exp_probB_,
214  double temp_,
215  bool stacking_,
216  bool new_stacking_,
217  bool mea_scoring_,
218  score_t alpha_factor_,
219  score_t beta_factor_,
220  score_t gamma_factor_,
221  score_t probability_scale_
222  )
223  : basematch(basematch_),
224  basemismatch(basemismatch_),
225  indel(indel_),
226  indel_loop(indel_loop_),
227  indel_opening(indel_opening_),
228  indel_opening_loop(indel_opening_loop_),
229  ribosum(ribosum_),
230  ribofit(ribofit_),
231  unpaired_penalty(unpaired_penalty_),
232  struct_weight(struct_weight_),
233  tau_factor(tau_factor_),
234  exclusion(exclusion_),
235  exp_probA(exp_probA_),
236  exp_probB(exp_probB_),
237  temperature(temp_),
238  stacking(stacking_),
239  new_stacking(new_stacking_),
240  mea_scoring(mea_scoring_),
241  alpha_factor(alpha_factor_),
242  beta_factor(beta_factor_),
243  gamma_factor(gamma_factor_),
244  probability_scale(probability_scale_)
245  {
246  }
247  };
248 
285  class Scoring {
286  public:
287  typedef BasePairs__Arc Arc;
288 
289  private:
290  const ScoringParams *params;
291 
292  const ArcMatches *arc_matches;
293 
294  const MatchProbs *match_probs;
295 
296  const RnaData &rna_dataA;
297  const RnaData &rna_dataB;
298  const Sequence &seqA;
299  const Sequence &seqB;
300 
305  score_t lambda_;
306 
307  public:
308 
323  Scoring(const Sequence &seqA,
324  const Sequence &seqB,
325  const RnaData &rna_dataA,
326  const RnaData &rna_dataB,
327  const ArcMatches &arc_matches,
328  const MatchProbs *match_probs,
329  const ScoringParams &params,
330  bool exp_scores=false
331  );
332 
333 
344  void
345  modify_by_parameter(score_t lambda);
346 
352  void
353  apply_unpaired_penalty();
354 
360  score_t lambda() const {return lambda_;}
361 
362  private:
363  // ------------------------------
364  // tables for precomputed score contributions
365  //
366  Matrix<score_t> sigma_tab;
367 
368  std::vector<score_t> gapcost_tabA;
369  std::vector<score_t> gapcost_tabB;
370 
371  std::vector<score_t> weightsA; //<! weights of base pairs in A
372  std::vector<score_t> weightsB; //<! weights of base pairs in B
373 
374  std::vector<score_t> stack_weightsA; //<! weights of stacked base
375  //<! pairs in A
376  std::vector<score_t> stack_weightsB; //<! weights of stacked base
377  //<! pairs in B
378 
379  // ------------------------------
380  // tables for precomputed exp score contributions for partition function
381  //
382  Matrix<pf_score_t> exp_sigma_tab;
383  pf_score_t exp_indel_opening_score;
384  pf_score_t exp_indel_opening_loop_score;
385  std::vector<pf_score_t> exp_gapcost_tabA;
386  std::vector<pf_score_t> exp_gapcost_tabB;
387 
388 
389  Matrix<size_t> identity;
390 
391  void
392  precompute_sequence_identities();
393 
402  score_t round2score(double d) const {
403  return (score_t)((d<0) ? (d-0.5) : (d+0.5));
404  }
405 
415  score_t
416  sigma_(int i, int j) const;
417 
423  void
424  precompute_sigma();
425 
431  void
432  precompute_exp_sigma();
433 
435  void
436  precompute_gapcost();
437 
439  void
440  precompute_exp_gapcost();
441 
443  void
444  precompute_weights();
445 
455  void
456  precompute_weights(const RnaData &rna_data,
457  const BasePairs &bps,
458  double exp_prob,
459  std::vector<score_t> &weights,
460  std::vector<score_t> &stack_weights);
461 
471  score_t
472  probToWeight(double p, double prob_exp) const;
473 
478  double
479  ribosum_arcmatch_prob(const Arc &arcA, const Arc &arcB) const;
480 
494  score_t
495  riboX_arcmatch_score(const Arc &arcA, const Arc &arcB) const;
496 
497 
498  pf_score_t
499  boltzmann_weight(score_t s) const { return exp(s/(pf_score_t)params->temperature); }
500 
501 
503  void
504  subtract(std::vector<score_t> &v,score_t x) const;
505 
507  void
508  subtract(Matrix<score_t> &m,score_t x) const;
509 
510  public:
511  // ------------------------------------------------------------
512  // SCORE CONTRIBUTIONS
513 
514 
524  return sigma_tab(i,j);
525  }
526 
536  return exp_sigma_tab(i,j);
537  }
538 
556  score_t arcmatch(const ArcMatch &am, bool stacked=false) const;
557 
575  score_t arcmatch(const BasePairs__Arc &arcA, const BasePairs__Arc &arcB, bool stacked=false) const;
576 
577 
587  score_t
588  arcDel(const BasePairs__Arc &arc, bool gapAorB, bool stacked=false) const;
589 
597  pf_score_t exp_arcmatch(const ArcMatch &am) const {
598  return boltzmann_weight(arcmatch(am));
599  }
600 
608  score_t arcmatch_stacked(const ArcMatch &am) const {
609  return arcmatch(am, true);
610  }
611 
620  score_t gapX(size_type alignedToGap, bool gapInA) const {
621  if (gapInA)
622  return gapA(alignedToGap);
623  else
624  return gapB(alignedToGap);
625  }
626 
634  score_t gapA(size_type posA) const {
635  assert(1<=posA && posA <= seqA.length());
636 
637  return gapcost_tabA[posA];
638  }
639 
648  assert(1<=posA && posA <= seqA.length());
649  return exp_gapcost_tabA[posA];
650  }
651 
659  score_t gapB(size_type posB) const {
660  assert(1<=posB && posB <= seqB.length());
661 
662  return gapcost_tabB[posB];
663  }
664 
673  assert(1<=posB && posB <= seqB.length());
674 
675  return exp_gapcost_tabB[posB];
676  }
677 
679  score_t exclusion() const {
680  return params->exclusion;
681  }
682 
685  return params->indel_opening;
686  }
687 
689  score_t loop_indel_score(const score_t score) const {
690  return round2score(score * params->indel_loop / params->indel);
691  }
694  return params->indel_opening_loop;
695  }
696 
697 
700  return exp_indel_opening_score;
701  }
702 
705  return exp_indel_opening_loop_score;
706  }
707 
708  //
709  // ------------------------------------------------------------
710 
720  double prob_exp(size_type len) const;
721 
727  bool stacking() const {return params->stacking || params->new_stacking;}
728 
729 
737  bool
738  is_stackable_arcA(const Arc &a) const;
739 
747  bool
748  is_stackable_arcB(const Arc &a) const;
749 
757  bool
758  is_stackable_am(const ArcMatch &am) const;
759 
760  }; // end class Scoring
761 
762 } // end namespace LocARNA
763 
764 #endif // LOCARNA_SCORING_HH
represent sparsified data of RNA ensemble
Definition: rna_data.hh:42
const Ribofit * ribofit
Definition: scoring.hh:103
score_t gapA(size_type posA) const
Definition: scoring.hh:634
const score_t alpha_factor
weight for mea contribution "unstructured"
Definition: scoring.hh:140
const score_t basematch
Definition: scoring.hh:73
score_t basematch(size_type i, size_type j) const
Score of a match of bases (without structure)
Definition: scoring.hh:523
pf_score_t exp_arcmatch(const ArcMatch &am) const
Boltzmann weight of score of arc match.
Definition: scoring.hh:597
pf_score_t exp_gapB(size_type posB) const
Boltzmann weight of score of insertion.
Definition: scoring.hh:672
Family of Ribofit matrices.
Definition: ribofit.hh:25
const score_t exclusion
cost of one exclusion.
Definition: scoring.hh:121
pf_score_t exp_basematch(size_type i, size_type j) const
Boltzmann weight of score of a base match (without structure)
Definition: scoring.hh:535
Matrix< infty_score_t > ScoreMatrix
matrix of scores supporting infinity
Definition: scoring.hh:45
std::vector< pf_score_t > PFScoreVector
Vector of partition functions.
Definition: scoring.hh:41
const score_t indel_opening
cost per gap (for affine gap-cost). Use affine gap cost if non-zero.
Definition: scoring.hh:85
const score_t struct_weight
Definition: scoring.hh:112
Provides probabilities for each match.
Definition: match_probs.hh:42
const score_t indel_opening_loop
cost per gap for loops(for affine gap-cost). Use affine gap cost if non-zero.
Definition: scoring.hh:88
size_t size_type
general size type
Definition: aux.hh:94
pf_score_t exp_indel_opening_loop() const
exp of cost to begin a new indel in loops
Definition: scoring.hh:704
Matrix< double > ProbMatrix
matrix for storing probabilities
Definition: aligner_p.hh:24
score_t exclusion() const
cost of an exclusion
Definition: scoring.hh:679
ScoringParams(score_t basematch_, score_t basemismatch_, score_t indel_, score_t indel_loop_, score_t indel_opening_, score_t indel_opening_loop_, RibosumFreq *ribosum_, Ribofit *ribofit_, score_t unpaired_penalty_, score_t struct_weight_, score_t tau_factor_, score_t exclusion_, double exp_probA_, double exp_probB_, double temp_, bool stacking_, bool new_stacking_, bool mea_scoring_, score_t alpha_factor_, score_t beta_factor_, score_t gamma_factor_, score_t probability_scale_)
Definition: scoring.hh:200
const bool stacking
turn on/off stacking terms
Definition: scoring.hh:131
const bool new_stacking
turn on/off new stacking terms
Definition: scoring.hh:134
Maintains the relevant arc matches and their scores.
Definition: arc_matches.hh:112
pf_score_t exp_gapA(size_type posA) const
Boltzmann weight of score of deletion.
Definition: scoring.hh:647
score_t indel_opening() const
cost to begin a new indel
Definition: scoring.hh:684
Definition: aligner.cc:17
const score_t gamma_factor
weight for mea contribution "consensus"
Definition: scoring.hh:146
score_t arcmatch_stacked(const ArcMatch &am) const
Score of stacked arc match.
Definition: scoring.hh:608
const bool mea_scoring
turn on/off mea scoring
Definition: scoring.hh:137
double pf_score_t
type of partition functions
Definition: scoring_fwd.hh:26
const score_t indel_loop
cost per indel for loops (for linear or affine gap cost).
Definition: scoring.hh:82
score_t lambda() const
Get factor lambda for normalized alignment.
Definition: scoring.hh:360
pos_type length() const
Length of multiple aligment.
Definition: multiple_alignment.hh:624
const RibosumFreq * ribosum
Definition: scoring.hh:95
score_t gapX(size_type alignedToGap, bool gapInA) const
Definition: scoring.hh:620
const score_t basemismatch
constant cost of a base mismatch
Definition: scoring.hh:76
score_t gapB(size_type posB) const
Definition: scoring.hh:659
Parameters for scoring.
Definition: scoring.hh:65
std::vector< infty_score_t > ScoreVector
matrix of scores supporting infinity
Definition: scoring.hh:35
const score_t probability_scale
Definition: scoring.hh:156
BasePairs__Arc Arc
arc
Definition: scoring.hh:287
Represents a base pair.
Definition: basepairs.hh:40
Provides methods for the scoring of alignments.
Definition: scoring.hh:285
const score_t unpaired_penalty
penalty/cost for unpaired bases matched/mismatched/gapped
Definition: scoring.hh:106
score_t loop_indel_score(const score_t score) const
multiply an score by the ratio of indel_loop/indel
Definition: scoring.hh:689
const score_t indel
cost per indel (for linear or affine gap cost).
Definition: scoring.hh:79
Describes sequence and structure ensemble of an RNA.
Definition: basepairs.hh:107
bool stacking() const
Query stacking flag.
Definition: scoring.hh:727
long int score_t
type of the locarna score as defined by the class Scoring
Definition: scoring_fwd.hh:13
Represents ribosum similarity matrices including raw frequencies.
Definition: ribosum.hh:175
const score_t beta_factor
weight for mea contribution "structure"
Definition: scoring.hh:143
Matrix< pf_score_t > PFScoreMatrix
Matrix of partition functions.
Definition: scoring.hh:48
pf_score_t exp_indel_opening() const
exp of cost to begin a new indel
Definition: scoring.hh:699
"Sequence View" of multiple alignment as array of column vectors
Definition: sequence.hh:29
const score_t tau_factor
Definition: scoring.hh:118
score_t indel_opening_loop() const
cost to begin a new indel
Definition: scoring.hh:693
Represents a match of two base pairs (arc match)
Definition: arc_matches.hh:35