代码拉取完成,页面将自动刷新
#ifndef SEQUENCE_IO_H_
#define SEQUENCE_IO_H_
#include <string>
#include <vector>
#include <stdexcept>
#include <fstream>
#include <stdio.h>
#include <seqan/sequence.h>
#include <seqan/file.h>
#include "assert_helpers.h"
#include "pat.h"
/**
* Read a sequence file of the given format and alphabet type. Store
* all of the extracted sequences in vector ss. Note that SeqAn's
* policy for when it encounters characters not from the specified
* alphabet is to convert them to the lexicographically smallest
* character in the alphabet.
*/
template <typename TStr, typename TFile>
static void readSequenceFile(const std::string& infile,
std::vector<TStr>& ss,
int64_t& baseCutoff, // limit for total bases
int seqCutoff = -1, // limit for sequences
bool reverse = false)
{
typedef typename Value<TStr>::Type TVal;
static char buf[256 * 1024]; // fairly large input buffer
if(baseCutoff <= 0) return;
FILE *in = fopen(infile.c_str(), "r");
if(in == NULL) {
cerr << "Could not open sequence file" << endl;
throw 1;
}
// Associate large input buffer with FILE *in
if(setvbuf(in, buf, _IOFBF, 256 * 1024) != 0) {
cerr << "Could not create input buffer for sequence file" << endl;
throw 1;
}
// Read entries using SeqAn
int cnt = 0;
while(!feof(in)) {
while(true) {
ss.push_back(TStr()); // add a new empty string to the end
// Fill the new empty string with the next sequence from
// the file. SeqAn allocates just enough mem for it (at
// the expense of lots of file seeks, which can add up)
seqan::read(in, ss.back(), TFile());
if(seqan::empty(ss.back())) {
ss.pop_back();
break;
}
// Enforce the base cutoff
if((int64_t)length(ss.back()) > baseCutoff) {
resize(ss.back(), baseCutoff);
baseCutoff = 0;
} else {
baseCutoff -= length(ss.back());
}
// Reverse the newly-read sequence in-place if desired
if(reverse) {
size_t len = length(ss.back());
for(size_t i = 0; i < len/2; i++) {
TVal t = ss.back()[i];
ss.back()[i] = ss.back()[len-i-1];
ss.back()[len-i-1] = t;
}
}
#ifndef NDEBUG
// Sanity check that all (int) values are in range
for(size_t i = 0; i < length(ss.back()); i++) {
assert_lt(ss.back()[i], (int)(ValueSize<TVal>::VALUE));
assert_geq(ss.back()[i], 0);
}
#endif
cnt++;
// Enforce the sequence cutoff
if(seqCutoff != -1 && cnt >= seqCutoff) {
fclose(in);
return;
}
}
}
fclose(in);
}
/**
* Read a set of sequence files of the given format and alphabet type.
* Store all of the extracted sequences in vector ss.
*/
template <typename TStr, typename TFile>
static void readSequenceFiles(const std::vector<std::string>& infiles,
std::vector<TStr>& ss,
int64_t& baseCutoff,
int seqCutoff = -1,
bool reverse = false)
{
for(size_t i = 0; i < infiles.size() && baseCutoff > 0; i++) {
readSequenceFile<TStr,TFile>(infiles[i], ss, baseCutoff, seqCutoff, reverse);
if(baseCutoff <= 0) break;
}
}
/**
* Read a set of sequence files of the given format and alphabet type.
* Store all of the extracted sequences in vector ss.
*/
template <typename TStr, typename TFile>
static void readSequenceFiles(const std::vector<std::string>& infiles,
std::vector<TStr>& ss,
int seqCutoff = -1,
bool reverse = false)
{
int64_t i = 0xffffffffll;
readSequenceFiles<TStr,TFile>(infiles, ss, i, seqCutoff, reverse);
}
/**
* Parse a comma-delimited list of strings of type T into a vector.
*/
template <typename T>
void readSequenceString(const std::string& s,
std::vector<T>& ss,
int64_t& baseCutoff,
int seqCutoff = -1,
bool reverse = false)
{
// Split string s using comma as a delimiter. Borrowed from C++
// Programming HOWTO 7.3
std::string::size_type lastPos = s.find_first_not_of(",", 0);
std::string::size_type pos = s.find_first_of(",", lastPos);
while (baseCutoff > 0 && (std::string::npos != pos || std::string::npos != lastPos)) {
string stmp = s.substr(lastPos, pos - lastPos);
if((int64_t)stmp.length() < baseCutoff) {
baseCutoff -= stmp.length();
} else {
stmp = stmp.substr(0, baseCutoff);
baseCutoff = 0;
}
if(reverse) {
size_t len = stmp.length();
for(size_t i = 0; i < len/2; i++) {
char tmp = stmp[i];
stmp[i] = stmp[len-i-1];
stmp[len-i-1] = tmp;
}
ss.push_back(T(stmp.c_str()));
} else {
ss.push_back(T(stmp.c_str()));
}
if(seqCutoff != -1 && ss.size() >= (size_t)seqCutoff) {
return;
}
lastPos = s.find_first_not_of(",", pos);
pos = s.find_first_of(",", lastPos);
}
}
/**
* Parse a comma-delimited list of strings of type T into a vector.
* Doesn't require callee to supply a baseCutoff.
*/
template <typename T>
void readSequenceString(const std::string& s,
std::vector<T>& ss,
int seqCutoff = -1,
bool reverse = false)
{
int64_t i = 0xffffffffll;
readSequenceString(s, ss, i, seqCutoff, reverse);
}
#endif /*SEQUENCE_IO_H_*/
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。