LocARNA-1.8.11
confusion_matrix.hh
1 #ifndef LOCARNA_CONFUSION_MATRIX
2 #define LOCARNA_CONFUSION_MATRIX
3 
4 #ifdef HAVE_CONFIG_H
5 # include <config.h>
6 #endif
7 
8 #include <cstddef>
9 #include <cassert>
10 #include <string>
11 
12 namespace LocARNA {
13  class RnaStructure;
14 
33  public:
35  typedef std::pair<size_t,size_t> bp_t;
36 
40  class BPFilter {
41  public:
42  BPFilter()
43  {};
44 
45  virtual
46  ~BPFilter()
47  {};
48 
49  virtual
50  bool
51  operator () (size_t i, size_t j) const {
52  assert(1<=i); // sequences are 1-based
53  assert(i<=j);
54  return true;
55  }
56 
57  virtual
58  bool operator () (const bp_t &bp) const {
59  return (*this)(bp.first,bp.second);
60  }
61  };
62 
66  class BPMinLoopSizeFilter : public BPFilter {
67  size_t mls_;
68  public:
69 
73  explicit
74  BPMinLoopSizeFilter(size_t mls)
75  : BPFilter(),
76  mls_(mls)
77  {}
78 
82 
88  bool
89  operator () (size_t i, size_t j) const {
90  assert(i>=1);
91  return i+mls_<j;
92  }
93  };
94 
98  class SpanRangeBPFilter: public BPFilter {
99  size_t lo_;
100  size_t up_;
101  public:
108  SpanRangeBPFilter(size_t lo, size_t up)
109  : BPFilter(), lo_(lo), up_(up)
110  {
111  assert(1<=lo);
112  }
113 
117 
123  bool
124  operator () (size_t i, size_t j) const {
125  assert(i<=j);
126  return lo_<=(j-i) && (up_==0 || (j-i)<=up_);
127  }
128  };
129 
133  class CanonicalBPFilter: public BPFilter {
134  const std::string &sequence_;
135  public:
136 
140  explicit
141  CanonicalBPFilter(const std::string &sequence)
142  : BPFilter(),
143  sequence_(sequence)
144  {}
145 
149 
156  bool
157  operator () (size_t i, size_t j) const {
158  return BPFilter::operator ()(i,j) && canonical(sequence_[i],sequence_[j]);
159  }
160  private:
161  static
162  bool
163  canonical(char x, char y) {
164  static std::string cpairs="AUCGGUUAGCUG";
165  for(size_t i=0; i<cpairs.length(); i+=2) {
166  if (x==cpairs[i] && y==cpairs[i+1]) return true;
167  }
168  return false;
169  }
170  };
171 
172  class CombinedBPFilter: public BPFilter {
173  const BPFilter &fa_;
174  const BPFilter &fb_;
175  public:
176  CombinedBPFilter(const BPFilter &fa, const BPFilter &fb):
177  BPFilter(),fa_(fa),fb_(fb)
178  {}
179 
181  {}
182 
183  bool
184  operator () (size_t i, size_t j) const {
185  return fa_(i,j) && fb_(i,j);
186  }
187  };
188 
189 
199  ConfusionMatrix(const std::string &ref,
200  const std::string &pred,
201  const bool slide,
202  const bool conflict,
203  const BPFilter &filter=BPMinLoopSizeFilter(3)
204  );
205 
215  ConfusionMatrix(const RnaStructure &ref,
216  const RnaStructure &pred,
217  const bool slide,
218  const bool conflict,
219  const BPFilter &filter=BPMinLoopSizeFilter(3)
220  );
221 
232  size_t
233  tp() const { return tp_; }
234 
243  size_t
244  tn() const { return tn_; }
245 
254  size_t
255  fp( ) const { return fp_; }
256 
265  size_t
266  fn() const {
267  return fn_;
268  }
269 
277  double
278  ppv() const;
279 
287  double
288  sens() const;
289 
295  double
296  spec() const;
297 
305  double
306  f1_score() const;
307 
313  double
314  mcc() const;
315 
316  protected:
317 
329  size_t
330  count_common_bps(const RnaStructure &s1,
331  const RnaStructure &s2);
332 
341  size_t
342  count_tps(const RnaStructure &pred,
343  const RnaStructure &ref);
344 
356  size_t
358  const RnaStructure &s2);
365  size_t
366  count_potential_base_pairs(size_t length);
367 
368 
375  size_t
376  count_base_pairs(const RnaStructure &s);
377 
378  private:
388  void
389  compute_confusion_matrix(const RnaStructure &ref,
390  const RnaStructure &pred);
391 
392  bool slide_;
393  bool conflict_;
394  const BPFilter &filter_;
395 
396  size_t tp_;
397  size_t tn_;
398  size_t fp_;
399  size_t fn_;
400  };
401 
402 } // end namespace LocARNA
403 
404 
405 #endif // LOCARNA_CONFUSION_MATRIX
std::pair< size_t, size_t > bp_t
Definition: confusion_matrix.hh:35
base pair filter to allow only canonical base pairs
Definition: confusion_matrix.hh:133
double ppv() const
Definition: confusion_matrix.cc:196
~BPMinLoopSizeFilter()
d&#39;tor
Definition: confusion_matrix.hh:81
size_t count_tps(const RnaStructure &pred, const RnaStructure &ref)
Count true positive base pairs.
Definition: confusion_matrix.cc:37
Compare RNA secondary structure by their confusion matrix.
Definition: confusion_matrix.hh:32
BPMinLoopSizeFilter(size_t mls)
constructor
Definition: confusion_matrix.hh:74
ConfusionMatrix(const std::string &ref, const std::string &pred, const bool slide, const bool conflict, const BPFilter &filter=BPMinLoopSizeFilter(3))
Definition: confusion_matrix.cc:162
size_t count_base_pairs(const RnaStructure &s)
Count base pairs in a structure.
Definition: confusion_matrix.cc:114
~SpanRangeBPFilter()
d&#39;tor
Definition: confusion_matrix.hh:116
double sens() const
Definition: confusion_matrix.cc:202
CanonicalBPFilter(const std::string &sequence)
constructor
Definition: confusion_matrix.hh:141
double f1_score() const
Definition: confusion_matrix.cc:214
size_t tn() const
True negatives.
Definition: confusion_matrix.hh:244
Definition: aligner.cc:17
size_t count_potential_base_pairs(size_t length)
Count potential base pairs.
Definition: confusion_matrix.cc:101
size_t count_conflicting_base_pairs(const RnaStructure &s1, const RnaStructure &s2)
Count conflicting base pairs (including common bps)
Definition: confusion_matrix.cc:77
basic class for base pair filters (no filtering)
Definition: confusion_matrix.hh:40
double mcc() const
Definition: confusion_matrix.cc:220
size_t fn() const
Definition: confusion_matrix.hh:266
base pair range filter
Definition: confusion_matrix.hh:98
loop size base pair filter
Definition: confusion_matrix.hh:66
size_t tp() const
True positives.
Definition: confusion_matrix.hh:233
Definition: confusion_matrix.hh:172
An RNA secondary structure.
Definition: rna_structure.hh:26
double spec() const
Definition: confusion_matrix.cc:208
SpanRangeBPFilter(size_t lo, size_t up)
Construct with range.
Definition: confusion_matrix.hh:108
~CanonicalBPFilter()
d&#39;tor
Definition: confusion_matrix.hh:148
size_t count_common_bps(const RnaStructure &s1, const RnaStructure &s2)
Count common base pairs.
Definition: confusion_matrix.cc:10
size_t fp() const
False positives.
Definition: confusion_matrix.hh:255