代码拉取完成,页面将自动刷新
#ifndef ALPHABETS_H_
#define ALPHABETS_H_
#include <stdexcept>
#include <string>
#include <seqan/sequence.h>
#include <seqan/file.h>
#include <sstream>
#include "assert_helpers.h"
using namespace std;
using namespace seqan;
/// Reverse a string in-place
template <typename TStr>
static inline void reverseInPlace(TStr& s) {
typedef typename Value<TStr>::Type TVal;
size_t len = length(s);
for(size_t i = 0; i < (len>>1); i++) {
TVal tmp = s[i];
s[i] = s[len-i-1];
s[len-i-1] = tmp;
}
}
/**
* Return a new TStr containing the reverse-complement of s. Ns go to
* Ns.
*/
template<typename TStr>
static inline TStr reverseComplement(const TStr& s, bool color) {
typedef typename Value<TStr>::Type TVal;
TStr s_rc;
size_t slen = length(s);
resize(s_rc, slen);
if(color) {
for(size_t i = 0; i < slen; i++) {
s_rc[i] = s[slen-i-1];
}
} else {
for(size_t i = 0; i < slen; i++) {
int sv = (int)s[slen-i-1];
if(sv == 4) {
s_rc[i] = (TVal)4;
} else {
s_rc[i] = (TVal)(sv ^ 3);
}
}
}
return s_rc;
}
/**
* Reverse-complement s in-place. Ns go to Ns.
*/
template<typename TStr>
static inline void reverseComplementInPlace(TStr& s, bool color) {
typedef typename Value<TStr>::Type TVal;
if(color) {
reverseInPlace(s);
return;
}
size_t len = length(s);
size_t i;
for(i = 0; i < (len>>1); i++) {
int sv = (int)s[len-i-1];
int sf = (int)s[i];
if(sv == 4) {
s[i] = (TVal)4;
} else {
s[i] = (TVal)(sv ^ 3);
}
if(sf == 4) {
s[len-i-1] = (TVal)4;
} else {
s[len-i-1] = (TVal)(sf ^ 3);
}
}
if((len & 1) != 0 && (int)s[len >> 1] != 4) {
s[len >> 1] = (TVal)((int)s[len >> 1] ^ 3);
}
}
/**
* Return the reverse-complement of s.
*/
template<typename TStr>
static inline TStr reverseCopy(const TStr& s) {
typedef typename Value<TStr>::Type TVal;
TStr s_rc;
size_t slen = length(s);
resize(s_rc, slen);
for(size_t i = 0; i < slen; i++) {
s_rc[i] = (TVal)((int)s[slen-i-1]);
}
return s_rc;
}
/**
* Return true iff the first string is dollar-less-than the second.
* This means that we pretend that a 'dollar sign' character,
* lexicographically larger than all other characters, exists at the
* end of both strings.
*/
template <typename TStr>
static inline bool
dollarLt(const TStr& l, const TStr& r) {
return isPrefix(r, l) || (l < r && !isPrefix(l, r));
}
/**
* Return true iff the first string is dollar-greater-than the second.
* This means that we pretend that a 'dollar sign' character,
* lexicographically larger than all other characters, exists at the
* end of both strings.
*/
template <typename TStr>
static inline bool
dollarGt(const TStr& l, const TStr& r) {
return !dollarLt(l, r);
}
/**
* Return a copy of the suffix of l starting at 'off'.
*/
template <typename TStr>
static inline std::string
suffixStr(const TStr& l, size_t off) {
typedef typename Value<TStr>::Type TVal;
std::string ret;
size_t len = seqan::length(l);
for(size_t i = off; i < len; i++) {
ret.push_back((char)(TVal)l[i]);
}
return ret;
}
/**
* Calculate the entropy of the given read. Handle Ns by charging them
* to the most frequent non-N character.
*/
static inline float entropyDna5(const String<Dna5>& read) {
size_t cs[5] = {0, 0, 0, 0, 0};
size_t readLen = seqan::length(read);
for(size_t i = 0; i < readLen; i++) {
int c = (int)read[i];
assert_lt(c, 5);
assert_geq(c, 0);
cs[c]++;
}
if(cs[4] > 0) {
// Charge the Ns to the non-N character with maximal count and
// then exclude them from the entropy calculation (i.e.,
// penalize Ns as much as possible)
if(cs[0] >= cs[1] && cs[0] >= cs[2] && cs[0] >= cs[3]) {
// Charge Ns to As
cs[0] += cs[4];
} else if(cs[1] >= cs[2] && cs[1] >= cs[3]) {
// Charge Ns to Cs
cs[1] += cs[4];
} else if(cs[2] >= cs[3]) {
// Charge Ns to Gs
cs[2] += cs[4];
} else {
// Charge Ns to Ts
cs[3] += cs[4];
}
}
float ent = 0.0;
for(int i = 0; i < 4; i++) {
if(cs[i] > 0) {
float frac = (float)cs[i] / (float)readLen;
ent += (frac * log(frac));
}
}
ent = -ent;
assert_geq(ent, 0.0);
return ent;
}
/**
* Return the DNA complement of the given ASCII char.
*/
static inline char comp(char c) {
switch(c) {
case 'a': return 't';
case 'A': return 'T';
case 'c': return 'g';
case 'C': return 'G';
case 'g': return 'c';
case 'G': return 'C';
case 't': return 'a';
case 'T': return 'A';
default: return c;
}
}
extern uint8_t dna4Cat[];
extern uint8_t charToDna5[];
/// Convert an ascii char to a 2-bit base: 0=A, 1=C, 2=G, 3=T, 4=N
extern uint8_t asc2dna[];
/// Convert an ascii char to a 2-bit base: 0=A, 1=C, 2=G, 3=T, 4=N
extern uint8_t asc2col[];
extern uint8_t rcCharToDna5[];
/// Convert an ascii char to a DNA category. Categories are:
/// 0 -> invalid
/// 1 -> unambiguous a, c, g or t
/// 2 -> ambiguous
/// 3 -> unmatchable
extern uint8_t asc2dnacat[];
/// Convert an ascii char to a color category. Categories are:
/// 0 -> invalid
/// 1 -> unambiguous 0, 1, 2 or 3
/// 2 -> ambiguous (not applicable for colors)
/// 3 -> unmatchable
extern uint8_t asc2colcat[];
/// Convert a 2-bit nucleotide (and 4=N) and a color to the
/// corresponding 2-bit nucleotide
extern uint8_t nuccol2nuc[5][5];
/// Convert ambiguous ASCII nuceleotide to mask
extern uint8_t asc2dnamask[];
/// Convert a 4-bit mask into an IUPAC code
extern char mask2iupac[16];
/**
* Return true iff c is an unambiguous Dna character.
*/
static inline bool isUnambigDna(char c) {
return asc2dnacat[(int)c] == 1;
}
/**
* Return true iff c is a Dna character.
*/
static inline bool isDna(char c) {
return asc2dnacat[(int)c] > 0;
}
/**
* Return true iff c is an unambiguous color character (0,1,2,3).
*/
static inline bool isUnambigColor(char c) {
return asc2colcat[(int)c] == 1;
}
/**
* Return true iff c is a color character.
*/
static inline bool isColor(char c) {
return asc2colcat[(int)c] > 0;
}
/// Convert bit encoded DNA char to its complement
extern int dnacomp[5];
/// String of all DNA and IUPAC characters
extern const char *iupacs;
/**
* Return the reverse complement of a bit-encoded nucleotide.
*/
static inline int compDna(int c) {
assert_leq(c, 4);
return dnacomp[c];
}
/// Convert a pair of 2-bit (and 4=N) encoded DNA bases to a color
extern uint8_t dinuc2color[5][5];
/// Map from masks to their reverse-complement masks
extern int maskcomp[16];
#endif /*ALPHABETS_H_*/
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。