1 Star 0 Fork 0

Velcon-Zheng/bowtie2

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
sstring.h 73.80 KB
一键复制 编辑 原始数据 按行查看 历史
Rone Charles 提交于 2020-01-16 10:55 . Remove whitespace
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412
/*
* Copyright 2011, Ben Langmead <langmea@cs.jhu.edu>
*
* This file is part of Bowtie 2.
*
* Bowtie 2 is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Bowtie 2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Bowtie 2. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef SSTRING_H_
#define SSTRING_H_
#include <string.h>
#include <iostream>
#include "assert_helpers.h"
#include "alphabet.h"
#include "random_source.h"
/**
* Four kinds of strings defined here:
*
* SString:
* A fixed-length string using heap memory with size set at construction time
* or when install() member is called.
*
* S2bDnaString:
* Like SString, but stores a list uint32_t words where each word is divided
* into 16 2-bit slots interpreted as holding one A/C/G/T nucleotide each.
*
* TODO: S3bDnaString allowing N. S4bDnaString allowing nucleotide masks.
*
* SStringExpandable:
* A string using heap memory where the size of the backing store is
* automatically resized as needed. Supports operations like append, insert,
* erase, etc.
*
* SStringFixed:
* A fixed-length string using stack memory where size is set at compile
* time.
*
* All string classes have some extra facilities that make it easy to print the
* string, including when the string uses an encoded alphabet. See toZBuf()
* and toZBufXForm().
*
* Global lt, eq, and gt template functions are supplied. They are capable of
* doing lexicographical comparisons between any of the three categories of
* strings defined here.
*/
template<typename T>
class Class_sstr_len {
public:
static inline size_t sstr_len(const T& s) {
return s.length();
}
};
template<unsigned N>
class Class_sstr_len<const char[N]> {
public:
static inline size_t sstr_len(const char s[N]) {
return strlen(s);
}
};
template<>
class Class_sstr_len<const char *> {
public:
static inline size_t sstr_len(const char *s) {
return strlen(s);
}
};
template<>
class Class_sstr_len<const unsigned char *> {
public:
static inline size_t sstr_len(const unsigned char *s) {
return strlen((const char *)s);
}
};
template<typename T1, typename T2>
static inline bool sstr_eq(const T1& s1, const T2& s2) {
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
if(len1 != len2) return false;
for(size_t i = 0; i < len1; i++) {
if(s1[i] != s2[i]) return false;
}
return true;
}
template<typename T1, typename T2>
static inline bool sstr_neq(const T1& s1, const T2& s2) {
return !sstr_eq(s1, s2);
}
/**
* Return true iff the given suffix of s1 is equal to the given suffix of s2 up
* to upto characters.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_upto_eq(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
size_t upto,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
if(len1 > upto) len1 = upto;
if(len2 > upto) len2 = upto;
if(len1 != len2) return false;
for(size_t i = 0; i < len1; i++) {
if(s1[suf1+i] != s2[suf2+i]) {
return false;
}
}
return true;
}
/**
* Return true iff the given suffix of s1 is equal to the given suffix of s2 up
* to upto characters.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_upto_neq(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
size_t upto,
bool endlt = true)
{
return !sstr_suf_upto_eq(s1, suf1, s2, suf2, upto, endlt);
}
/**
* Return true iff s1 is less than s2.
*/
template<typename T1, typename T2>
static inline bool sstr_lt(const T1& s1, const T2& s2, bool endlt = true) {
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] < s2[i]) {
return true;
} else if(s1[i] > s2[i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 < len2) == endlt;
}
/**
* Return true iff the given suffix of s1 is less than the given suffix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_lt(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[suf1+i] < s2[suf2+i]) {
return true;
} else if(s1[suf1+i] > s2[suf2+i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 < len2) == endlt;
}
/**
* Return true iff the given suffix of s1 is less than the given suffix of s2.
* Treat s1 and s2 as though they have lengths len1/len2.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_lt(
const T1& s1, size_t suf1, size_t len1,
const T2& s2, size_t suf2, size_t len2,
bool endlt = true)
{
assert_leq(suf1, len1);
assert_leq(suf2, len2);
size_t left1 = len1 - suf1;
size_t left2 = len2 - suf2;
size_t minleft = (left1 < left2 ? left1 : left2);
for(size_t i = 0; i < minleft; i++) {
if(s1[suf1+i] < s2[suf2+i]) {
return true;
} else if(s1[suf1+i] > s2[suf2+i]) {
return false;
}
}
if(left1 == left2) return false;
return (left1 < left2) == endlt;
}
/**
* Return true iff the given suffix of s1 is less than the given suffix of s2
* up to upto characters.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_upto_lt(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
size_t upto,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
if(len1 > upto) len1 = upto;
if(len2 > upto) len2 = upto;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[suf1+i] < s2[suf2+i]) {
return true;
} else if(s1[suf1+i] > s2[suf2+i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 < len2) == endlt;
}
/**
* Return true iff the given prefix of s1 is less than the given prefix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_pre_lt(
const T1& s1, size_t pre1,
const T2& s2, size_t pre2,
bool endlt = true)
{
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = pre1;
size_t len2 = pre2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] < s2[i]) {
return true;
} else if(s1[i] > s2[i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 < len2) == endlt;
}
/**
* Return true iff s1 is less than or equal to s2.
*/
template<typename T1, typename T2>
static inline bool sstr_leq(const T1& s1, const T2& s2, bool endlt = true) {
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] < s2[i]) {
return true;
} else if(s1[i] > s2[i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 < len2) == endlt;
}
/**
* Return true iff the given suffix of s1 is less than or equal to the given
* suffix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_leq(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[suf1+i] < s2[suf2+i]) {
return true;
} else if(s1[suf1+i] > s2[suf2+i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 < len2) == endlt;
}
/**
* Return true iff the given prefix of s1 is less than or equal to the given
* prefix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_pre_leq(
const T1& s1, size_t pre1,
const T2& s2, size_t pre2,
bool endlt = true)
{
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = pre1;
size_t len2 = pre2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] < s2[i]) {
return true;
} else if(s1[i] > s2[i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 < len2) == endlt;
}
/**
* Return true iff s1 is greater than s2.
*/
template<typename T1, typename T2>
static inline bool sstr_gt(const T1& s1, const T2& s2, bool endlt = true) {
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] > s2[i]) {
return true;
} else if(s1[i] < s2[i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 > len2) == endlt;
}
/**
* Return true iff the given suffix of s1 is greater than the given suffix of
* s2.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_gt(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[suf1+i] > s2[suf2+i]) {
return true;
} else if(s1[suf1+i] < s2[suf2+i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 > len2) == endlt;
}
/**
* Return true iff the given prefix of s1 is greater than the given prefix of
* s2.
*/
template<typename T1, typename T2>
static inline bool sstr_pre_gt(
const T1& s1, size_t pre1,
const T2& s2, size_t pre2,
bool endlt = true)
{
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = pre1;
size_t len2 = pre2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] > s2[i]) {
return true;
} else if(s1[i] < s2[i]) {
return false;
}
}
if(len1 == len2) return false;
return (len1 > len2) == endlt;
}
/**
* Return true iff s1 is greater than or equal to s2.
*/
template<typename T1, typename T2>
static inline bool sstr_geq(const T1& s1, const T2& s2, bool endlt = true) {
size_t len1 = Class_sstr_len<T1>::sstr_len(s1);
size_t len2 = Class_sstr_len<T2>::sstr_len(s2);
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] > s2[i]) {
return true;
} else if(s1[i] < s2[i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 > len2) == endlt;
}
/**
* Return true iff the given suffix of s1 is greater than or equal to the given
* suffix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_suf_geq(
const T1& s1, size_t suf1,
const T2& s2, size_t suf2,
bool endlt = true)
{
assert_leq(suf1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(suf2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = Class_sstr_len<T1>::sstr_len(s1) - suf1;
size_t len2 = Class_sstr_len<T2>::sstr_len(s2) - suf2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[suf1+i] > s2[suf2+i]) {
return true;
} else if(s1[suf1+i] < s2[suf2+i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 > len2) == endlt;
}
/**
* Return true iff the given prefix of s1 is greater than or equal to the given
* prefix of s2.
*/
template<typename T1, typename T2>
static inline bool sstr_pre_geq(
const T1& s1, size_t pre1,
const T2& s2, size_t pre2,
bool endlt = true)
{
assert_leq(pre1, Class_sstr_len<T1>::sstr_len(s1));
assert_leq(pre2, Class_sstr_len<T2>::sstr_len(s2));
size_t len1 = pre1;
size_t len2 = pre2;
size_t minlen = (len1 < len2 ? len1 : len2);
for(size_t i = 0; i < minlen; i++) {
if(s1[i] > s2[i]) {
return true;
} else if(s1[i] < s2[i]) {
return false;
}
}
if(len1 == len2) return true;
return (len1 > len2) == endlt;
}
template<typename T>
static inline const char * sstr_to_cstr(const T& s) {
return s.toZBuf();
}
template<>
inline const char * sstr_to_cstr<std::basic_string<char> >(
const std::basic_string<char>& s)
{
return s.c_str();
}
/**
* Simple string class with backing memory whose size is managed by the user
* using the constructor and install() member function. No behind-the-scenes
* reallocation or copying takes place.
*/
template<typename T>
class SString {
public:
explicit SString() :
cs_(NULL),
printcs_(NULL),
len_(0)
{ }
explicit SString(size_t sz) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
resize(sz);
}
/**
* Create an SStringExpandable from another SStringExpandable.
*/
SString(const SString<T>& o) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
*this = o;
}
/**
* Create an SStringExpandable from a std::basic_string of the
* appropriate type.
*/
explicit SString(const std::basic_string<T>& str) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
install(str.c_str(), str.length());
}
/**
* Create an SStringExpandable from an array and size.
*/
explicit SString(const T* b, size_t sz) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
install(b, sz);
}
/**
* Create an SStringExpandable from a zero-terminated array.
*/
explicit SString(const T* b) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
install(b, strlen(b));
}
/**
* Destroy the expandable string object.
*/
virtual ~SString() {
if(cs_ != NULL) {
delete[] cs_;
cs_ = NULL;
}
if(printcs_ != NULL) {
delete[] printcs_;
printcs_ = NULL;
}
len_ = 0;
}
/**
* Assignment to other SString.
*/
SString<T>& operator=(const SString<T>& o) {
install(o.cs_, o.len_);
return *this;
}
/**
* Assignment to other SString.
*/
SString<T>& operator=(const std::basic_string<T>& o) {
install(o);
return *this;
}
/**
* Resizes the string without preserving its contents.
*/
void resize(size_t sz) {
if(cs_ != NULL) {
delete cs_;
cs_ = NULL;
}
if(printcs_ != NULL) {
delete printcs_;
printcs_ = NULL;
}
if(sz != 0) {
cs_ = new T[sz+1];
}
len_ = sz;
}
/**
* Return ith character from the left of either the forward or the
* reverse version of the read.
*/
T windowGet(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_lt(i, len);
assert_leq(len, len_ - depth);
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
void windowGet(
T& ret,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_leq(len, len_ - depth);
ret.resize(len);
for(size_t i = 0; i < len; i++) {
ret.set(fw ? cs_[depth+i] : cs_[depth+len-i-1], i);
}
}
/**
* Set character at index 'idx' to 'c'.
*/
inline void set(int c, size_t idx) {
assert_lt(idx, len_);
cs_[idx] = c;
}
/**
* Retrieve constant version of element i.
*/
inline const T& operator[](size_t i) const {
assert_lt(i, len_);
return cs_[i];
}
/**
* Retrieve mutable version of element i.
*/
inline T& operator[](size_t i) {
assert_lt(i, len_);
return cs_[i];
}
/**
* Retrieve constant version of element i.
*/
inline const T& get(size_t i) const {
assert_lt(i, len_);
return cs_[i];
}
/**
* Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
* operator=.
*/
virtual void install(const T* b, size_t sz) {
if(sz == 0) return;
resize(sz);
memcpy(cs_, b, sz * sizeof(T));
}
/**
* Copy 'sz' bytes from buffer 'b' into this string. memcpy is used, not
* operator=.
*/
virtual void install(const std::basic_string<T>& b) {
size_t sz = b.length();
if(sz == 0) return;
resize(sz);
memcpy(cs_, b.c_str(), sz * sizeof(T));
}
/**
* Copy all bytes from zero-terminated buffer 'b' into this string.
*/
void install(const T* b) {
install(b, strlen(b));
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const char* b, size_t sz) {
if(sz == 0) return;
resize(sz);
for(size_t i = 0; i < sz; i++) {
cs_[i] = b[sz-i-1];
}
len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const SString<T>& b) {
installReverse(b.cs_, b.len_);
}
/**
* Return true iff the two strings are equal.
*/
bool operator==(const SString<T>& o) {
return sstr_eq(*this, o);
}
/**
* Return true iff the two strings are not equal.
*/
bool operator!=(const SString<T>& o) {
return sstr_neq(*this, o);
}
/**
* Return true iff this string is less than given string.
*/
bool operator<(const SString<T>& o) {
return sstr_lt(*this, o);
}
/**
* Return true iff this string is greater than given string.
*/
bool operator>(const SString<T>& o) {
return sstr_gt(*this, o);
}
/**
* Return true iff this string is less than or equal to given string.
*/
bool operator<=(const SString<T>& o) {
return sstr_leq(*this, o);
}
/**
* Return true iff this string is greater than or equal to given string.
*/
bool operator>=(const SString<T>& o) {
return sstr_geq(*this, o);
}
/**
* Reverse the buffer in place.
*/
void reverse() {
for(size_t i = 0; i < (len_ >> 1); i++) {
T tmp = get(i);
set(get(len_-i-1), i);
set(tmp, len_-i-1);
}
}
/**
* Reverse a substring of the buffer in place.
*/
void reverseWindow(size_t off, size_t len) {
assert_leq(off, len_);
assert_leq(off + len, len_);
size_t mid = len >> 1;
for(size_t i = 0; i < mid; i++) {
T tmp = get(off+i);
set(get(off+len-i-1), off+i);
set(tmp, off+len-i-1);
}
}
/**
* Set the first len elements of the buffer to el.
*/
void fill(size_t len, const T& el) {
assert_leq(len, len_);
for(size_t i = 0; i < len; i++) {
set(el, i);
}
}
/**
* Set all elements of the buffer to el.
*/
void fill(const T& el) {
fill(len_, el);
}
/**
* Return the length of the string.
*/
inline size_t length() const { return len_; }
/**
* Clear the buffer.
*/
void clear() { len_ = 0; }
/**
* Return true iff the buffer is empty.
*/
inline bool empty() const { return len_ == 0; }
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
const char* toZBufXForm(const char *xform) const {
ASSERT_ONLY(size_t xformElts = strlen(xform));
// Lazily allocate space for print buffer
if(printcs_ == NULL) {
const_cast<char*&>(printcs_) = new char[len_+1];
}
char* printcs = const_cast<char*>(printcs_);
assert(printcs != NULL);
for(size_t i = 0; i < len_; i++) {
assert_lt(cs_[i], (int)xformElts);
printcs[i] = xform[cs_[i]];
}
printcs[len_] = 0;
return printcs_;
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const T* toZBuf() const {
const_cast<T*>(cs_)[len_] = 0;
return cs_;
}
/**
* Return a const version of the raw buffer.
*/
const T* buf() const { return cs_; }
/**
* Return a writeable version of the raw buffer.
*/
T* wbuf() { return cs_; }
protected:
T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
size_t len_; // # elements
};
/**
* Simple string class with backing memory whose size is managed by the user
* using the constructor and install() member function. No behind-the-scenes
* reallocation or copying takes place.
*/
class S2bDnaString {
public:
explicit S2bDnaString() :
cs_(NULL),
printcs_(NULL),
len_(0)
{ }
explicit S2bDnaString(size_t sz) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
resize(sz);
}
/**
* Copy another object of the same class.
*/
S2bDnaString(const S2bDnaString& o) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
*this = o;
}
/**
* Create an SStringExpandable from a std::basic_string of the
* appropriate type.
*/
explicit S2bDnaString(
const std::basic_string<char>& str,
bool chars = false,
bool colors = false) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
if(chars) {
if(colors) {
installColors(str.c_str(), str.length());
} else {
installChars(str.c_str(), str.length());
}
} else {
install(str.c_str(), str.length());
}
}
/**
* Create an SStringExpandable from an array and size.
*/
explicit S2bDnaString(
const char* b,
size_t sz,
bool chars = false,
bool colors = false) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
if(chars) {
if(colors) {
installColors(b, sz);
} else {
installChars(b, sz);
}
} else {
install(b, sz);
}
}
/**
* Create an SStringFixed from a zero-terminated string.
*/
explicit S2bDnaString(
const char* b,
bool chars = false,
bool colors = false) :
cs_(NULL),
printcs_(NULL),
len_(0)
{
if(chars) {
if(colors) {
installColors(b, strlen(b));
} else {
installChars(b, strlen(b));
}
} else {
install(b, strlen(b));
}
}
/**
* Destroy the expandable string object.
*/
virtual ~S2bDnaString() {
if(cs_ != NULL) {
delete[] cs_;
cs_ = NULL;
}
if(printcs_ != NULL) {
delete[] printcs_;
printcs_ = NULL;
}
len_ = 0;
}
/**
* Assignment to other SString.
*/
template<typename T>
S2bDnaString& operator=(const T& o) {
install(o.c_str(), o.length());
return *this;
}
/**
* Assignment from a std::basic_string
*/
template<typename T>
S2bDnaString& operator=(const std::basic_string<char>& o) {
install(o);
return *this;
}
/**
* Resizes the string without preserving its contents.
*/
void resize(size_t sz) {
if(cs_ != NULL) {
delete cs_;
cs_ = NULL;
}
if(printcs_ != NULL) {
delete printcs_;
printcs_ = NULL;
}
len_ = sz;
if(sz != 0) {
cs_ = new uint32_t[nwords()];
}
}
/**
* Return DNA character corresponding to element 'idx'.
*/
char toChar(size_t idx) const {
int c = (int)get(idx);
assert_range(0, 3, c);
return "ACGT"[c];
}
/**
* Return color character corresponding to element 'idx'.
*/
char toColor(size_t idx) const {
int c = (int)get(idx);
assert_range(0, 3, c);
return "0123"[c];
}
/**
* Return ith character from the left of either the forward or the
* reverse version of the read.
*/
char windowGet(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_lt(i, len);
assert_leq(len, len_ - depth);
return fw ? get(depth+i) : get(depth+len-i-1);
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
template<typename T>
void windowGet(
T& ret,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_leq(len, len_ - depth);
ret.resize(len);
for(size_t i = 0; i < len; i++) {
ret.set((fw ? get(depth+i) : get(depth+len-i-1)), i);
}
}
/**
* Return length in 32-bit words.
*/
size_t nwords() const {
return (len_ + 15) >> 4;
}
/**
* Set character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, len_);
assert_range(0, 3, c);
size_t word = idx >> 4;
size_t bpoff = (idx & 15) << 1;
cs_[word] = cs_[word] & ~(uint32_t)(3 << bpoff);
cs_[word] = cs_[word] | (uint32_t)(c << bpoff);
}
/**
* Set character at index 'idx' to DNA char 'c'.
*/
void setChar(int c, size_t idx) {
assert_in(toupper(c), "ACGT");
int bp = asc2dna[c];
set(bp, idx);
}
/**
* Set character at index 'idx' to color char 'c'.
*/
void setColor(int c, size_t idx) {
assert_in(toupper(c), "0123");
int co = asc2col[c];
set(co, idx);
}
/**
* Set the ith 32-bit word to given word.
*/
void setWord(uint32_t w, size_t i) {
assert_lt(i, nwords());
cs_[i] = w;
}
/**
* Retrieve constant version of element i.
*/
char operator[](size_t i) const {
assert_lt(i, len_);
return get(i);
}
/**
* Retrieve constant version of element i.
*/
char get(size_t i) const {
assert_lt(i, len_);
size_t word = i >> 4;
size_t bpoff = (i & 15) << 1;
return (char)((cs_[word] >> bpoff) & 3);
}
/**
* Copy packed words from string 'b' into this packed string.
*/
void install(const uint32_t* b, size_t sz) {
if(sz == 0) return;
resize(sz);
memcpy(cs_, b, sizeof(uint32_t)*nwords());
}
/**
* Copy 'sz' DNA characters encoded as integers from buffer 'b' into this
* packed string.
*/
void install(const char* b, size_t sz) {
if(sz == 0) return;
resize(sz);
size_t wordi = 0;
for(size_t i = 0; i < sz; i += 16) {
uint32_t word = 0;
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
uint32_t bp = (int)b[i+j];
uint32_t shift = (uint32_t)j << 1;
assert_range(0, 3, (int)bp);
word |= (bp << shift);
}
cs_[wordi++] = word;
}
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void installChars(const char* b, size_t sz) {
if(sz == 0) return;
resize(sz);
size_t wordi = 0;
for(size_t i = 0; i < sz; i += 16) {
uint32_t word = 0;
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
char c = b[i+j];
assert_in(toupper(c), "ACGT");
int bp = asc2dna[(int)c];
assert_range(0, 3, (int)bp);
uint32_t shift = (uint32_t)j << 1;
word |= (bp << shift);
}
cs_[wordi++] = word;
}
}
/**
* Copy 'sz' color characters from buffer 'b' into this packed string.
*/
void installColors(const char* b, size_t sz) {
if(sz == 0) return;
resize(sz);
size_t wordi = 0;
for(size_t i = 0; i < sz; i += 16) {
uint32_t word = 0;
for(int j = 0; j < 16 && (size_t)(i+j) < sz; j++) {
char c = b[i+j];
assert_in(c, "0123");
int bp = asc2col[(int)c];
assert_range(0, 3, (int)bp);
uint32_t shift = (uint32_t)j << 1;
word |= (bp << shift);
}
cs_[wordi++] = word;
}
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void install(const char* b) {
install(b, strlen(b));
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void installChars(const char* b) {
installChars(b, strlen(b));
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void installColors(const char* b) {
installColors(b, strlen(b));
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void install(const std::basic_string<char>& b) {
install(b.c_str(), b.length());
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void installChars(const std::basic_string<char>& b) {
installChars(b.c_str(), b.length());
}
/**
* Copy 'sz' DNA characters from buffer 'b' into this packed string.
*/
void installColors(const std::basic_string<char>& b) {
installColors(b.c_str(), b.length());
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const char* b, size_t sz) {
resize(sz);
if(sz == 0) return;
size_t wordi = 0;
size_t bpi = 0;
cs_[0] = 0;
for(size_t i =sz; i > 0; i--) {
assert_range(0, 3, (int)b[i-1]);
cs_[wordi] |= ((int)b[i-1] << (bpi<<1));
if(bpi == 15) {
wordi++;
cs_[wordi] = 0;
bpi = 0;
} else bpi++;
}
}
/**
* Copy all chars from buffer of DNA characters 'b' into this string,
* reversing them in the process.
*/
void installReverse(const char* b) {
installReverse(b, strlen(b));
}
/**
* Copy 'sz' bytes from buffer of DNA characters 'b' into this string,
* reversing them in the process.
*/
void installReverseChars(const char* b, size_t sz) {
resize(sz);
if(sz == 0) return;
size_t wordi = 0;
size_t bpi = 0;
cs_[0] = 0;
for(size_t i =sz; i > 0; i--) {
char c = b[i-1];
assert_in(toupper(c), "ACGT");
int bp = asc2dna[(int)c];
assert_range(0, 3, bp);
cs_[wordi] |= (bp << (bpi<<1));
if(bpi == 15) {
wordi++;
cs_[wordi] = 0;
bpi = 0;
} else bpi++;
}
}
/**
* Copy all chars from buffer of DNA characters 'b' into this string,
* reversing them in the process.
*/
void installReverseChars(const char* b) {
installReverseChars(b, strlen(b));
}
/**
* Copy 'sz' bytes from buffer of color characters 'b' into this string,
* reversing them in the process.
*/
void installReverseColors(const char* b, size_t sz) {
resize(sz);
if(sz == 0) return;
size_t wordi = 0;
size_t bpi = 0;
cs_[0] = 0;
for(size_t i =sz; i > 0; i--) {
char c = b[i-1];
assert_in(c, "0123");
int bp = asc2col[(int)c];
assert_range(0, 3, bp);
cs_[wordi] |= (bp << (bpi<<1));
if(bpi == 15) {
wordi++;
cs_[wordi] = 0;
bpi = 0;
} else bpi++;
}
}
/**
* Copy all chars from buffer of color characters 'b' into this string,
* reversing them in the process.
*/
void installReverseColors(const char* b) {
installReverseColors(b, strlen(b));
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const S2bDnaString& b) {
resize(b.len_);
if(b.len_ == 0) return;
size_t wordi = 0;
size_t bpi = 0;
size_t wordb = b.nwords()-1;
size_t bpb = (b.len_-1) & 15;
cs_[0] = 0;
for(size_t i = b.len_; i > 0; i--) {
int bbp = (int)((b[wordb] >> (bpb << 1)) & 3);
assert_range(0, 3, bbp);
cs_[wordi] |= (bbp << (bpi << 1));
if(bpi == 15) {
wordi++;
cs_[wordi] = 0;
bpi = 0;
} else bpi++;
if(bpb == 0) {
wordb--;
bpi = 15;
} else bpi--;
}
}
/**
* Return true iff the two strings are equal.
*/
bool operator==(const S2bDnaString& o) {
return sstr_eq(*this, o);
}
/**
* Return true iff the two strings are not equal.
*/
bool operator!=(const S2bDnaString& o) {
return sstr_neq(*this, o);
}
/**
* Return true iff this string is less than given string.
*/
bool operator<(const S2bDnaString& o) {
return sstr_lt(*this, o);
}
/**
* Return true iff this string is greater than given string.
*/
bool operator>(const S2bDnaString& o) {
return sstr_gt(*this, o);
}
/**
* Return true iff this string is less than or equal to given string.
*/
bool operator<=(const S2bDnaString& o) {
return sstr_leq(*this, o);
}
/**
* Return true iff this string is greater than or equal to given string.
*/
bool operator>=(const S2bDnaString& o) {
return sstr_geq(*this, o);
}
/**
* Reverse the 2-bit encoded DNA string in-place.
*/
void reverse() {
if(len_ <= 1) return;
size_t wordf = nwords()-1;
size_t bpf = (len_-1) & 15;
size_t wordi = 0;
size_t bpi = 0;
while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
int f = (cs_[wordf] >> (bpf << 1)) & 3;
int i = (cs_[wordi] >> (bpi << 1)) & 3;
cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
cs_[wordf] |= (uint32_t)(i << (bpf << 1));
cs_[wordi] |= (uint32_t)(f << (bpi << 1));
if(bpf == 0) {
bpf = 15;
wordf--;
} else bpf--;
if(bpi == 15) {
bpi = 0;
wordi++;
} else bpi++;
}
}
/**
* Reverse a substring of the buffer in place.
*/
void reverseWindow(size_t off, size_t len) {
assert_leq(off, len_);
assert_leq(off+len, len_);
if(len <= 1) return;
size_t wordf = (off+len-1) >> 4;
size_t bpf = (off+len-1) & 15;
size_t wordi = (off ) >> 4;
size_t bpi = (off ) & 15;
while(wordf > wordi || (wordf == wordi && bpf > bpi)) {
int f = (cs_[wordf] >> (bpf << 1)) & 3;
int i = (cs_[wordi] >> (bpi << 1)) & 3;
cs_[wordf] &= ~(uint32_t)(3 << (bpf << 1));
cs_[wordi] &= ~(uint32_t)(3 << (bpi << 1));
cs_[wordf] |= (uint32_t)(i << (bpf << 1));
cs_[wordi] |= (uint32_t)(f << (bpi << 1));
if(bpf == 0) {
bpf = 15;
wordf--;
} else bpf--;
if(bpi == 15) {
bpi = 0;
wordi++;
} else bpi++;
}
}
/**
* Set the first len elements of the buffer to el.
*/
void fill(size_t len, char el) {
assert_leq(len, len_);
assert_range(0, 3, (int)el);
size_t word = 0;
if(len > 32) {
// Copy el throughout block
uint32_t bl = (uint32_t)el;
bl |= (bl << 2);
bl |= (bl << 4);
bl |= (bl << 8);
bl |= (bl << 16);
// Fill with blocks
size_t blen = len >> 4;
for(; word < blen; word++) {
cs_[word] = bl;
}
len = len & 15;
}
size_t bp = 0;
for(size_t i = 0; i < len; i++) {
cs_[word] &= ~(uint32_t)(3 << (bp << 1));
cs_[word] |= (uint32_t)(el << (bp << 1));
if(bp == 15) {
word++;
bp = 0;
} else bp++;
}
}
/**
* Set all elements of the buffer to el.
*/
void fill(char el) {
fill(len_, el);
}
/**
* Return the ith character in the window defined by fw, color, depth and
* len.
*/
char windowGetDna(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_lt(i, len);
assert_leq(len, len_ - depth);
if(fw) {
return get(depth+i);
} else {
return compDna(get(depth+len-i-1));
}
}
/**
* Fill the given DNA buffer with the substring specified by fw,
* color, depth and len.
*/
template<typename T>
void windowGetDna(
T& buf,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_leq(len, len_ - depth);
buf.resize(len);
for(size_t i = 0; i < len; i++) {
buf.set(
(fw ?
get(depth+i) :
compDna(get(depth+len-i-1))), i);
}
}
/**
* Return the length of the string.
*/
inline size_t length() const { return len_; }
/**
* Clear the buffer.
*/
void clear() { len_ = 0; }
/**
* Return true iff the buffer is empty.
*/
inline bool empty() const { return len_ == 0; }
/**
* Return a const version of the raw buffer.
*/
const uint32_t* buf() const { return cs_; }
/**
* Return a writeable version of the raw buffer.
*/
uint32_t* wbuf() { return cs_; }
/**
* Note: the size of the string once it's stored in the print buffer is 4
* times as large as the string as stored in compact 2-bit-per-char words.
*/
const char* toZBuf() const {
if(printcs_ == NULL) {
const_cast<char*&>(printcs_) = new char[len_+1];
}
char *printcs = const_cast<char*>(printcs_);
size_t word = 0, bp = 0;
for(size_t i = 0; i < len_; i++) {
int c = (cs_[word] >> (bp << 1)) & 3;
printcs[i] = "ACGT"[c];
if(bp == 15) {
word++;
bp = 0;
} else bp++;
}
printcs[len_] = '\0';
return printcs_;
}
protected:
uint32_t *cs_; // 2-bit packed words
char *printcs_;
size_t len_; // # elements
};
/**
* Simple string class with backing memory that automatically expands as needed.
*/
template<typename T, int S = 1024, int M = 2, int I = 0>
class SStringExpandable {
public:
explicit SStringExpandable() :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
if(I > 0) {
expandNoCopy(I);
}
}
explicit SStringExpandable(size_t sz) :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
expandNoCopy(sz);
}
/**
* Create an SStringExpandable from another SStringExpandable.
*/
SStringExpandable(const SStringExpandable<T, S>& o) :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
*this = o;
}
/**
* Create an SStringExpandable from a std::basic_string of the
* appropriate type.
*/
explicit SStringExpandable(const std::basic_string<T>& str) :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
install(str.c_str(), str.length());
}
/**
* Create an SStringExpandable from an array and size.
*/
explicit SStringExpandable(const T* b, size_t sz) :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
install(b, sz);
}
/**
* Create an SStringExpandable from a zero-terminated array.
*/
explicit SStringExpandable(const T* b) :
cs_(NULL),
printcs_(NULL),
len_(0),
sz_(0)
{
install(b, strlen(b));
}
/**
* Destroy the expandable string object.
*/
virtual ~SStringExpandable() {
if(cs_ != NULL) {
delete[] cs_;
cs_ = NULL;
}
if(printcs_ != NULL) {
delete[] printcs_;
printcs_ = NULL;
}
sz_ = len_ = 0;
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
T windowGet(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_lt(i, len);
assert_leq(len, len_ - depth);
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
void windowGet(
T& ret,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_leq(len, len_ - depth);
for(size_t i = 0; i < len; i++) {
ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
}
}
/**
* Assignment to other SStringExpandable.
*/
SStringExpandable<T,S,M,I>& operator=(const SStringExpandable<T,S,M,I>& o) {
install(o.cs_, o.len_);
return *this;
}
/**
* Assignment from a std::basic_string
*/
SStringExpandable<T,S>& operator=(const std::basic_string<T>& o) {
install(o.c_str(), o.length());
return *this;
}
/**
* Insert char c before position 'idx'; slide subsequent chars down.
*/
void insert(const T& c, size_t idx) {
assert_lt(idx, len_);
if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
len_++;
// Move everyone down by 1
// len_ is the *new* length
for(size_t i = len_; i > idx+1; i--) {
cs_[i-1] = cs_[i-2];
}
cs_[idx] = c;
}
/**
* Set character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, len_);
cs_[idx] = c;
}
/**
* Append char c.
*/
void append(const T& c) {
if(sz_ < len_ + 1) expandCopy((len_ + 1 + S) * M);
cs_[len_++] = c;
}
/**
* Delete char at position 'idx'; slide subsequent chars up.
*/
void remove(size_t idx) {
assert_lt(idx, len_);
assert_gt(len_, 0);
for(size_t i = idx; i < len_-1; i++) {
cs_[i] = cs_[i+1];
}
len_--;
}
/**
* Retrieve constant version of element i.
*/
const T& operator[](size_t i) const {
assert_lt(i, len_);
return cs_[i];
}
/**
* Retrieve mutable version of element i.
*/
T& operator[](size_t i) {
assert_lt(i, len_);
return cs_[i];
}
/**
* Retrieve constant version of element i.
*/
const T& get(size_t i) const {
assert_lt(i, len_);
return cs_[i];
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(const T* b, size_t sz) {
if(sz_ < sz) expandNoCopy((sz + S) * M);
memcpy(cs_, b, sz * sizeof(T));
len_ = sz;
}
/**
* Copy all bytes from zero-terminated buffer 'b' into this string.
*/
void install(const T* b) { install(b, strlen(b)); }
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const char* b, size_t sz) {
if(sz_ < sz) expandNoCopy((sz + S) * M);
for(size_t i = 0; i < sz; i++) {
cs_[i] = b[sz-i-1];
}
len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const SStringExpandable<T, S>& b) {
if(sz_ < b.len_) expandNoCopy((b.len_ + S) * M);
for(size_t i = 0; i < b.len_; i++) {
cs_[i] = b.cs_[b.len_ - i - 1];
}
len_ = b.len_;
}
/**
* Return true iff the two strings are equal.
*/
bool operator==(const SStringExpandable<T, S>& o) {
return sstr_eq(*this, o);
}
/**
* Return true iff the two strings are not equal.
*/
bool operator!=(const SStringExpandable<T, S>& o) {
return sstr_neq(*this, o);
}
/**
* Return true iff this string is less than given string.
*/
bool operator<(const SStringExpandable<T, S>& o) {
return sstr_lt(*this, o);
}
/**
* Return true iff this string is greater than given string.
*/
bool operator>(const SStringExpandable<T, S>& o) {
return sstr_gt(*this, o);
}
/**
* Return true iff this string is less than or equal to given string.
*/
bool operator<=(const SStringExpandable<T, S>& o) {
return sstr_leq(*this, o);
}
/**
* Return true iff this string is greater than or equal to given string.
*/
bool operator>=(const SStringExpandable<T, S>& o) {
return sstr_geq(*this, o);
}
/**
* Reverse the buffer in place.
*/
void reverse() {
for(size_t i = 0; i < (len_ >> 1); i++) {
T tmp = get(i);
set(get(len_-i-1), i);
set(tmp, len_-i-1);
}
}
/**
* Reverse a substring of the buffer in place.
*/
void reverseWindow(size_t off, size_t len) {
assert_leq(off, len_);
assert_leq(off + len, len_);
size_t mid = len >> 1;
for(size_t i = 0; i < mid; i++) {
T tmp = get(off+i);
set(get(off+len-i-1), off+i);
set(tmp, off+len-i-1);
}
}
/**
* Simply resize the buffer. If the buffer is resized to be
* longer, the newly-added elements will contain garbage and should
* be initialized immediately.
*/
void resize(size_t len) {
if(sz_ < len) expandCopy((len + S) * M);
len_ = len;
}
/**
* Simply resize the buffer. If the buffer is resized to be
* longer, new elements will be initialized with 'el'.
*/
void resize(size_t len, const T& el) {
if(sz_ < len) expandCopy((len + S) * M);
if(len > len_) {
for(size_t i = len_; i < len; i++) {
cs_[i] = el;
}
}
len_ = len;
}
/**
* Set the first len elements of the buffer to el.
*/
void fill(size_t len, const T& el) {
assert_leq(len, len_);
for(size_t i = 0; i < len; i++) {
cs_[i] = el;
}
}
/**
* Set all elements of the buffer to el.
*/
void fill(const T& el) {
fill(len_, el);
}
/**
* Trim len characters from the beginning of the string.
*/
void trimBegin(size_t len) {
assert_leq(len, len_);
if(len == len_) {
len_ = 0; return;
}
for(size_t i = 0; i < len_-len; i++) {
cs_[i] = cs_[i+len];
}
len_ -= len;
}
/**
* Trim len characters from the end of the string.
*/
size_t trimEnd(size_t len) {
if(len >= len_) {
size_t ret = len_;
len_ = 0;
return ret;
}
len_ -= len;
return len;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
void append(const T* b, size_t sz) {
if(sz_ < len_ + sz) expandCopy((len_ + sz + S) * M);
memcpy(cs_ + len_, b, sz * sizeof(T));
len_ += sz;
}
/**
* Copy bytes from zero-terminated buffer 'b' into this string.
*/
void append(const T* b) {
append(b, strlen(b));
}
/**
* Return the length of the string.
*/
size_t length() const { return len_; }
/**
* Clear the buffer.
*/
void clear() { len_ = 0; }
/**
* Return true iff the buffer is empty.
*/
bool empty() const { return len_ == 0; }
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
const char* toZBufXForm(const char *xform) const {
ASSERT_ONLY(size_t xformElts = strlen(xform));
if(empty()) {
const_cast<char&>(zero_) = 0;
return &zero_;
}
char* printcs = const_cast<char*>(printcs_);
// Lazily allocate space for print buffer
for(size_t i = 0; i < len_; i++) {
assert_lt(cs_[i], (int)xformElts);
printcs[i] = xform[(int)cs_[i]];
}
printcs[len_] = 0;
return printcs_;
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const T* toZBuf() const {
if(empty()) {
const_cast<T&>(zeroT_) = 0;
return &zeroT_;
}
assert_leq(len_, sz_);
const_cast<T*>(cs_)[len_] = 0;
return cs_;
}
/**
* Return true iff this DNA string matches the given nucleotide
* character string.
*/
bool eq(const char *str) const {
const char *self = toZBuf();
return strcmp(str, self) == 0;
}
/**
* Return a const version of the raw buffer.
*/
const T* buf() const { return cs_; }
/**
* Return a writeable version of the raw buffer.
*/
T* wbuf() { return cs_; }
protected:
/**
* Allocate new, bigger buffer and copy old contents into it. If
* requested size can be accommodated by current buffer, do nothing.
*/
void expandCopy(size_t sz) {
if(sz_ >= sz) return; // done!
T *tmp = new T[sz + 1];
char *ptmp = new char[sz + 1];
if(cs_ != NULL) {
memcpy(tmp, cs_, sizeof(T)*len_);
delete[] cs_;
}
if(printcs_ != NULL) {
memcpy(ptmp, printcs_, sizeof(char)*len_);
delete[] printcs_;
}
cs_ = tmp;
printcs_ = ptmp;
sz_ = sz;
}
/**
* Allocate new, bigger buffer. If requested size can be
* accommodated by current buffer, do nothing.
*/
void expandNoCopy(size_t sz) {
if(sz_ >= sz) return; // done!
if(cs_ != NULL) delete[] cs_;
if(printcs_ != NULL) delete[] printcs_;
cs_ = new T[sz + 1];
printcs_ = new char[sz + 1];
sz_ = sz;
}
T *cs_; // +1 so that we have the option of dropping in a terminating "\0"
char *printcs_; // +1 so that we have the option of dropping in a terminating "\0"
char zero_; // 0 terminator for empty string
T zeroT_; // 0 terminator for empty string
size_t len_; // # filled-in elements
size_t sz_; // size capacity of cs_
};
/**
* Simple string class with in-object storage.
*
* All copies induced by, e.g., operator=, the copy constructor,
* install() and append(), are shallow (using memcpy/sizeof). If deep
* copies are needed, use a different class.
*
* Reading from an uninitialized element results in an assert as long
* as NDEBUG is not defined. If NDEBUG is defined, the result is
* undefined.
*/
template<typename T, int S>
class SStringFixed {
public:
explicit SStringFixed() : len_(0) { }
/**
* Create an SStringFixed from another SStringFixed.
*/
SStringFixed(const SStringFixed<T, S>& o) {
*this = o;
}
/**
* Create an SStringFixed from another SStringFixed.
*/
explicit SStringFixed(const std::basic_string<T>& str) {
install(str.c_str(), str.length());
}
/**
* Create an SStringFixed from an array and size.
*/
explicit SStringFixed(const T* b, size_t sz) {
install(b, sz);
}
/**
* Create an SStringFixed from a zero-terminated string.
*/
explicit SStringFixed(const T* b) {
install(b, strlen(b));
}
virtual ~SStringFixed() { } // C++ needs this
/**
* Retrieve constant version of element i.
*/
inline const T& operator[](size_t i) const {
return get(i);
}
/**
* Retrieve mutable version of element i.
*/
inline T& operator[](size_t i) {
return get(i);
}
/**
* Retrieve constant version of element i.
*/
inline const T& get(size_t i) const {
assert_lt(i, len_);
return cs_[i];
}
/**
* Retrieve mutable version of element i.
*/
inline T& get(size_t i) {
assert_lt(i, len_);
return cs_[i];
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
T windowGet(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_lt(i, len);
assert_leq(len, len_ - depth);
return fw ? cs_[depth+i] : cs_[depth+len-i-1];
}
/**
* Return ith character from the left of either the forward or the
* reverse-complement version of the read.
*/
void windowGet(
T& ret,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = len_;
assert_leq(len, len_ - depth);
for(size_t i = 0; i < len; i++) {
ret.append(fw ? cs_[depth+i] : cs_[depth+len-i-1]);
}
}
/**
* Assignment to other SStringFixed.
*/
SStringFixed<T,S>& operator=(const SStringFixed<T,S>& o) {
install(o.cs_, o.len_);
return *this;
}
/**
* Assignment from a std::basic_string
*/
SStringFixed<T,S>& operator=(const std::basic_string<T>& o) {
install(o);
return *this;
}
/**
* Insert char c before position 'idx'; slide subsequent chars down.
*/
void insert(const T& c, size_t idx) {
assert_lt(len_, S);
assert_lt(idx, len_);
// Move everyone down by 1
for(int i = len_; i > idx; i--) {
cs_[i] = cs_[i-1];
}
cs_[idx] = c;
len_++;
}
/**
* Set character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, len_);
cs_[idx] = c;
}
/**
* Append char c.
*/
void append(const T& c) {
assert_lt(len_, S);
cs_[len_++] = c;
}
/**
* Delete char at position 'idx'; slide subsequent chars up.
*/
void remove(size_t idx) {
assert_lt(idx, len_);
assert_gt(len_, 0);
for(size_t i = idx; i < len_-1; i++) {
cs_[i] = cs_[i+1];
}
len_--;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(const T* b, size_t sz) {
assert_leq(sz, S);
memcpy(cs_, b, sz * sizeof(T));
len_ = sz;
}
/**
* Copy all bytes from zero-terminated buffer 'b' into this string.
*/
void install(const T* b) { install(b, strlen(b)); }
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const char* b, size_t sz) {
assert_leq(sz, S);
for(size_t i = 0; i < sz; i++) {
cs_[i] = b[sz-i-1];
}
len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reversing them
* in the process.
*/
void installReverse(const SStringFixed<T, S>& b) {
assert_leq(b.len_, S);
for(size_t i = 0; i < b.len_; i++) {
cs_[i] = b.cs_[b.len_ - i - 1];
}
len_ = b.len_;
}
/**
* Return true iff the two strings are equal.
*/
bool operator==(const SStringFixed<T, S>& o) {
return sstr_eq(*this, o);
}
/**
* Return true iff the two strings are not equal.
*/
bool operator!=(const SStringFixed<T, S>& o) {
return sstr_neq(*this, o);
}
/**
* Return true iff this string is less than given string.
*/
bool operator<(const SStringFixed<T, S>& o) {
return sstr_lt(*this, o);
}
/**
* Return true iff this string is greater than given string.
*/
bool operator>(const SStringFixed<T, S>& o) {
return sstr_gt(*this, o);
}
/**
* Return true iff this string is less than or equal to given string.
*/
bool operator<=(const SStringFixed<T, S>& o) {
return sstr_leq(*this, o);
}
/**
* Return true iff this string is greater than or equal to given string.
*/
bool operator>=(const SStringFixed<T, S>& o) {
return sstr_geq(*this, o);
}
/**
* Reverse the buffer in place.
*/
void reverse() {
for(size_t i = 0; i < (len_ >> 1); i++) {
T tmp = get(i);
set(get(len_-i-1), i);
set(tmp, len_-i-1);
}
}
/**
* Reverse a substring of the buffer in place.
*/
void reverseWindow(size_t off, size_t len) {
assert_leq(off, len_);
assert_leq(off + len, len_);
size_t mid = len >> 1;
for(size_t i = 0; i < mid; i++) {
T tmp = get(off+i);
set(get(off+len-i-1), off+i);
set(tmp, off+len-i-1);
}
}
/**
* Simply resize the buffer. If the buffer is resized to be
* longer, the newly-added elements will contain garbage and should
* be initialized immediately.
*/
void resize(size_t len) {
assert_lt(len, S);
len_ = len;
}
/**
* Simply resize the buffer. If the buffer is resized to be
* longer, new elements will be initialized with 'el'.
*/
void resize(size_t len, const T& el) {
assert_lt(len, S);
if(len > len_) {
for(size_t i = len_; i < len; i++) {
cs_[i] = el;
}
}
len_ = len;
}
/**
* Set the first len elements of the buffer to el.
*/
void fill(size_t len, const T& el) {
assert_leq(len, len_);
for(size_t i = 0; i < len; i++) {
cs_[i] = el;
}
}
/**
* Set all elements of the buffer to el.
*/
void fill(const T& el) {
fill(len_, el);
}
/**
* Trim len characters from the beginning of the string.
*/
void trimBegin(size_t len) {
assert_leq(len, len_);
if(len == len_) {
len_ = 0; return;
}
for(size_t i = 0; i < len_-len; i++) {
cs_[i] = cs_[i+len];
}
len_ -= len;
}
/**
* Trim len characters from the end of the string.
*/
void trimEnd(size_t len) {
if(len >= len_) len_ = 0;
else len_ -= len;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
void append(const T* b, size_t sz) {
assert_leq(sz + len_, S);
memcpy(cs_ + len_, b, sz * sizeof(T));
len_ += sz;
}
/**
* Copy bytes from zero-terminated buffer 'b' into this string.
*/
void append(const T* b) {
append(b, strlen(b));
}
/**
* Return the length of the string.
*/
size_t length() const { return len_; }
/**
* Clear the buffer.
*/
void clear() { len_ = 0; }
/**
* Return true iff the buffer is empty.
*/
bool empty() const { return len_ == 0; }
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const T* toZBuf() const {
const_cast<T*>(cs_)[len_] = 0;
return cs_;
}
/**
* Return true iff this DNA string matches the given nucleotide
* character string.
*/
bool eq(const char *str) const {
const char *self = toZBuf();
return strcmp(str, self) == 0;
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
const char* toZBufXForm(const char *xform) const {
ASSERT_ONLY(size_t xformElts = strlen(xform));
char* printcs = const_cast<char*>(printcs_);
for(size_t i = 0; i < len_; i++) {
assert_lt(cs_[i], (int)xformElts);
printcs[i] = xform[cs_[i]];
}
printcs[len_] = 0;
return printcs_;
}
/**
* Return a const version of the raw buffer.
*/
const T* buf() const { return cs_; }
/**
* Return a writeable version of the raw buffer.
*/
T* wbuf() { return cs_; }
protected:
T cs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0"
char printcs_[S+1]; // +1 so that we have the option of dropping in a terminating "\0"
size_t len_;
};
//
// Stream put operators
//
template <typename T, int S, int M>
std::ostream& operator<< (std::ostream& os, const SStringExpandable<T, S, M>& str) {
os << str.toZBuf();
return os;
}
template <typename T, int S>
std::ostream& operator<< (std::ostream& os, const SStringFixed<T, S>& str) {
os << str.toZBuf();
return os;
}
extern uint8_t asc2dna[];
extern uint8_t asc2col[];
/**
* Encapsulates a fixed-length DNA string with characters encoded as
* chars. Only capable of encoding A, C, G, T and N. The length is
* specified via the template parameter S.
*/
template<int S>
class SDnaStringFixed : public SStringFixed<char, S> {
public:
explicit SDnaStringFixed() : SStringFixed<char, S>() { }
/**
* Create an SStringFixed from another SStringFixed.
*/
SDnaStringFixed(const SDnaStringFixed<S>& o) :
SStringFixed<char, S>(o) { }
/**
* Create an SStringFixed from a C++ basic_string.
*/
explicit SDnaStringFixed(const std::basic_string<char>& str) :
SStringFixed<char, S>(str) { }
/**
* Create an SStringFixed from an array and size.
*/
explicit SDnaStringFixed(const char* b, size_t sz) :
SStringFixed<char, S>(b, sz) { }
/**
* Create an SStringFixed from a zero-terminated string.
*/
explicit SDnaStringFixed(
const char* b,
bool chars = false,
bool colors = false) :
SStringFixed<char, S>()
{
if(chars) {
if(colors) {
installColors(b, strlen(b));
} else {
installChars(b, strlen(b));
}
} else {
install(b, strlen(b));
}
}
virtual ~SDnaStringFixed() { } // C++ needs this
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const char* b, size_t sz) {
assert_leq(sz, S);
for(size_t i = 0; i < sz; i++) {
this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3);
}
this->len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const SDnaStringFixed<S>& b) {
assert_leq(b.len_, S);
for(size_t i = 0; i < b.len_; i++) {
this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3);
}
this->len_ = b.len_;
}
/**
* Either reverse or reverse-complement (depending on "color") this
* DNA buffer in-place.
*/
void reverseComp() {
for(size_t i = 0; i < (this->len_ >> 1); i++) {
char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3);
char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3);
this->cs_[i] = tmp2;
this->cs_[this->len_-i-1] = tmp1;
}
// Do middle element iff there are an odd number
if((this->len_ & 1) != 0) {
char tmp = this->cs_[this->len_ >> 1];
tmp = (tmp == 4 ? 4 : tmp ^ 3);
this->cs_[this->len_ >> 1] = tmp;
}
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(const char* b, size_t sz) {
assert_leq(sz, S);
memcpy(this->cs_, b, sz);
#ifndef NDEBUG
for(size_t i = 0; i < sz; i++) {
assert_leq(this->cs_[i], 4);
assert_geq(this->cs_[i], 0);
}
#endif
this->len_ = sz;
}
/**
* Copy buffer 'b' of ASCII DNA characters into normal DNA
* characters.
*/
virtual void installChars(const char* b, size_t sz) {
assert_leq(sz, S);
for(size_t i = 0; i < sz; i++) {
assert_in(toupper(b[i]), "ACGTN-");
this->cs_[i] = asc2dna[(int)b[i]];
assert_geq(this->cs_[i], 0);
assert_leq(this->cs_[i], 4);
}
this->len_ = sz;
}
/**
* Copy buffer 'b' of ASCII color characters into normal DNA
* characters.
*/
virtual void installColors(const char* b, size_t sz) {
assert_leq(sz, S);
for(size_t i = 0; i < sz; i++) {
assert_in(b[i], "0123.");
this->cs_[i] = asc2col[(int)b[i]];
assert_geq(this->cs_[i], 0);
assert_leq(this->cs_[i], 4);
}
this->len_ = sz;
}
/**
* Copy C++ string of ASCII DNA characters into normal DNA
* characters.
*/
virtual void installChars(const std::basic_string<char>& str) {
installChars(str.c_str(), str.length());
}
/**
* Copy C++ string of ASCII color characters into normal DNA
* characters.
*/
virtual void installColors(const std::basic_string<char>& str) {
installColors(str.c_str(), str.length());
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, this->len_);
assert_leq(c, 4);
assert_geq(c, 0);
this->cs_[idx] = c;
}
/**
* Append DNA char c.
*/
void append(const char& c) {
assert_lt(this->len_, S);
assert_leq(c, 4);
assert_geq(c, 0);
this->cs_[this->len_++] = c;
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void setChar(char c, size_t idx) {
assert_lt(idx, this->len_);
assert_in(toupper(c), "ACGTN");
this->cs_[idx] = asc2dna[(int)c];
}
/**
* Append DNA character.
*/
void appendChar(char c) {
assert_lt(this->len_, S);
assert_in(toupper(c), "ACGTN");
this->cs_[this->len_++] = asc2dna[(int)c];
}
/**
* Return DNA character corresponding to element 'idx'.
*/
char toChar(size_t idx) const {
assert_geq((int)this->cs_[idx], 0);
assert_leq((int)this->cs_[idx], 4);
return "ACGTN"[(int)this->cs_[idx]];
}
/**
* Retrieve constant version of element i.
*/
const char& operator[](size_t i) const {
return this->get(i);
}
/**
* Retrieve constant version of element i.
*/
const char& get(size_t i) const {
assert_lt(i, this->len_);
assert_leq(this->cs_[i], 4);
assert_geq(this->cs_[i], 0);
return this->cs_[i];
}
/**
* Return the ith character in the window defined by fw, color,
* depth and len.
*/
char windowGetDna(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_lt(i, len);
assert_leq(len, this->len_ - depth);
if(fw) return this->cs_[depth+i];
else return compDna(this->cs_[depth+len-i-1]);
}
/**
* Fill the given DNA buffer with the substring specified by fw,
* color, depth and len.
*/
void windowGetDna(
SDnaStringFixed<S>& buf,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_leq(len, this->len_ - depth);
for(size_t i = 0; i < len; i++) {
buf.append(fw ? this->cs_[depth+i] :
compDna(this->cs_[depth+len-i-1]));
}
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); }
};
/**
* Encapsulates a fixed-length DNA string with characters encoded as
* chars. Only capable of encoding A, C, G, T and N. The length is
* specified via the template parameter S.
*/
template<int S = 1024, int M = 2>
class SDnaStringExpandable : public SStringExpandable<char, S, M> {
public:
explicit SDnaStringExpandable() : SStringExpandable<char, S, M>() { }
/**
* Create an SStringFixed from another SStringFixed.
*/
SDnaStringExpandable(const SDnaStringExpandable<S, M>& o) :
SStringExpandable<char, S, M>(o) { }
/**
* Create an SStringFixed from a C++ basic_string.
*/
explicit SDnaStringExpandable(
const std::basic_string<char>& str,
bool chars = false,
bool colors = false) :
SStringExpandable<char, S, M>()
{
if(chars) {
if(colors) {
installColors(str);
} else {
installChars(str);
}
} else {
install(str);
}
}
/**
* Create an SStringFixed from an array and size.
*/
explicit SDnaStringExpandable(
const char* b,
size_t sz,
bool chars = false,
bool colors = false) :
SStringExpandable<char, S, M>()
{
if(chars) {
if(colors) {
installColors(b, sz);
} else {
installChars(b, sz);
}
} else {
install(b, sz);
}
}
/**
* Create an SStringFixed from a zero-terminated string.
*/
explicit SDnaStringExpandable(
const char* b,
bool chars = false,
bool colors = false) :
SStringExpandable<char, S, M>()
{
install(b, chars, colors);
}
virtual ~SDnaStringExpandable() { } // C++ needs this
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const char* b, size_t sz) {
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
for(size_t i = 0; i < sz; i++) {
this->cs_[i] = (b[sz-i-1] == 4 ? 4 : b[sz-i-1] ^ 3);
}
this->len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const SDnaStringExpandable<S, M>& b) {
if(this->sz_ < b.len_) this->expandCopy((b.len_ + S) * M);
for(size_t i = 0; i < b.len_; i++) {
this->cs_[i] = (b.cs_[b.len_-i-1] == 4 ? 4 : b.cs_[b.len_-i-1] ^ 3);
}
this->len_ = b.len_;
}
/**
* Either reverse or reverse-complement (depending on "color") this
* DNA buffer in-place.
*/
void reverseComp() {
for(size_t i = 0; i < (this->len_ >> 1); i++) {
char tmp1 = (this->cs_[i] == 4 ? 4 : this->cs_[i] ^ 3);
char tmp2 = (this->cs_[this->len_-i-1] == 4 ? 4 : this->cs_[this->len_-i-1] ^ 3);
this->cs_[i] = tmp2;
this->cs_[this->len_-i-1] = tmp1;
}
// Do middle element iff there are an odd number
if((this->len_ & 1) != 0) {
char tmp = this->cs_[this->len_ >> 1];
tmp = (tmp == 4 ? 4 : tmp ^ 3);
this->cs_[this->len_ >> 1] = tmp;
}
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(
const char* b,
bool chars = false,
bool colors = false)
{
if(chars) {
if(colors) {
installColors(b, strlen(b));
} else {
installChars(b, strlen(b));
}
} else {
install(b, strlen(b));
}
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(const char* b, size_t sz) {
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
memcpy(this->cs_, b, sz);
#ifndef NDEBUG
for(size_t i = 0; i < sz; i++) {
assert_range(0, 4, (int)this->cs_[i]);
}
#endif
this->len_ = sz;
}
/**
* Copy buffer 'b' of ASCII DNA characters into normal DNA
* characters.
*/
virtual void installChars(const char* b, size_t sz) {
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
for(size_t i = 0; i < sz; i++) {
assert_in(toupper(b[i]), "ACGTN-");
this->cs_[i] = asc2dna[(int)b[i]];
assert_range(0, 4, (int)this->cs_[i]);
}
this->len_ = sz;
}
/**
* Copy buffer 'b' of ASCII color characters into normal DNA
* characters.
*/
virtual void installColors(const char* b, size_t sz) {
if(this->sz_ < sz) this->expandCopy((sz + S) * M);
for(size_t i = 0; i < sz; i++) {
assert_in(b[i], "0123.");
this->cs_[i] = asc2col[(int)b[i]];
assert_range(0, 4, (int)this->cs_[i]);
}
this->len_ = sz;
}
/**
* Copy C++ string of ASCII DNA characters into normal DNA
* characters.
*/
virtual void installChars(const std::basic_string<char>& str) {
installChars(str.c_str(), str.length());
}
/**
* Copy C++ string of ASCII color characters into normal DNA
* characters.
*/
virtual void installColors(const std::basic_string<char>& str) {
installColors(str.c_str(), str.length());
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, this->len_);
assert_range(0, 4, c);
this->cs_[idx] = c;
}
/**
* Append DNA char c.
*/
void append(const char& c) {
if(this->sz_ < this->len_ + 1) {
this->expandCopy((this->len_ + 1 + S) * M);
}
assert_range(0, 4, (int)c);
this->cs_[this->len_++] = c;
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void setChar(char c, size_t idx) {
assert_lt(idx, this->len_);
assert_in(toupper(c), "ACGTN");
this->cs_[idx] = asc2dna[(int)c];
}
/**
* Append DNA character.
*/
void appendChar(char c) {
if(this->sz_ < this->len_ + 1) {
this->expandCopy((this->len_ + 1 + S) * M);
}
assert_in(toupper(c), "ACGTN");
this->cs_[this->len_++] = asc2dna[(int)c];
}
/**
* Return DNA character corresponding to element 'idx'.
*/
char toChar(size_t idx) const {
assert_range(0, 4, (int)this->cs_[idx]);
return "ACGTN"[(int)this->cs_[idx]];
}
/**
* Retrieve constant version of element i.
*/
inline const char& operator[](size_t i) const {
return this->get(i);
}
/**
* Retrieve constant version of element i.
*/
inline const char& get(size_t i) const {
assert_lt(i, this->len_);
assert_range(0, 4, (int)this->cs_[i]);
return this->cs_[i];
}
/**
* Return the ith character in the window defined by fw, color,
* depth and len.
*/
char windowGetDna(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_lt(i, len);
assert_leq(len, this->len_ - depth);
if(fw) return this->cs_[depth+i];
else return compDna(this->cs_[depth+len-i-1]);
}
/**
* Fill the given DNA buffer with the substring specified by fw,
* color, depth and len.
*/
void windowGetDna(
SDnaStringExpandable<S, M>& buf,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_leq(len, this->len_ - depth);
for(size_t i = 0; i < len; i++) {
buf.append(fw ? this->cs_[depth+i] :
compDna(this->cs_[depth+len-i-1]));
}
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const char* toZBuf() const { return this->toZBufXForm("ACGTN"); }
};
/**
* Encapsulates an expandable DNA string with characters encoded as
* char-sized masks. Encodes A, C, G, T, and all IUPAC, as well as the
* empty mask indicating "matches nothing."
*/
template<int S = 16, int M = 2>
class SDnaMaskString : public SStringExpandable<char, S, M> {
public:
explicit SDnaMaskString() : SStringExpandable<char, S, M>() { }
/**
* Create an SStringFixed from another SStringFixed.
*/
SDnaMaskString(const SDnaMaskString<S, M>& o) :
SStringExpandable<char, S, M>(o) { }
/**
* Create an SStringFixed from a C++ basic_string.
*/
explicit SDnaMaskString(const std::basic_string<char>& str) :
SStringExpandable<char, S, M>(str) { }
/**
* Create an SStringFixed from an array and size.
*/
explicit SDnaMaskString(const char* b, size_t sz) :
SStringExpandable<char, S, M>(b, sz) { }
/**
* Create an SStringFixed from a zero-terminated string.
*/
explicit SDnaMaskString(const char* b, bool chars = false) :
SStringExpandable<char, S, M>()
{
if(chars) {
installChars(b, strlen(b));
} else {
install(b, strlen(b));
}
}
virtual ~SDnaMaskString() { } // C++ needs this
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const char* b, size_t sz) {
while(this->sz_ < sz) {
this->expandNoCopy((sz + S) * M);
}
for(size_t i = 0; i < sz; i++) {
this->cs_[i] = maskcomp[(int)b[sz-i-1]];
}
this->len_ = sz;
}
/**
* Copy 'sz' bytes from buffer 'b' into this string, reverse-
* complementing them in the process, assuming an encoding where
* 0=A, 1=C, 2=G, 3=T, 4=N.
*/
void installReverseComp(const SDnaMaskString<S, M>& b) {
while(this->sz_ < b.len_) {
this->expandNoCopy((b.len_ + S) * M);
}
for(size_t i = 0; i < b.len_; i++) {
this->cs_[i] = maskcomp[(int)b.cs_[b.len_-i-1]];
}
this->len_ = b.len_;
}
/**
* Either reverse or reverse-complement (depending on "color") this
* DNA buffer in-place.
*/
void reverseComp() {
for(size_t i = 0; i < (this->len_ >> 1); i++) {
char tmp1 = maskcomp[(int)this->cs_[i]];
char tmp2 = maskcomp[(int)this->cs_[this->len_-i-1]];
this->cs_[i] = tmp2;
this->cs_[this->len_-i-1] = tmp1;
}
// Do middle element iff there are an odd number
if((this->len_ & 1) != 0) {
char tmp = this->cs_[this->len_ >> 1];
tmp = maskcomp[(int)tmp];
this->cs_[this->len_ >> 1] = tmp;
}
}
/**
* Copy 'sz' bytes from buffer 'b' into this string.
*/
virtual void install(const char* b, size_t sz) {
while(this->sz_ < sz) {
this->expandNoCopy((sz + S) * M);
}
memcpy(this->cs_, b, sz);
#ifndef NDEBUG
for(size_t i = 0; i < sz; i++) {
assert_range((int)this->cs_[i], 0, 15);
}
#endif
this->len_ = sz;
}
/**
* Copy buffer 'b' of ASCII DNA characters into DNA masks.
*/
virtual void installChars(const char* b, size_t sz) {
while(this->sz_ < sz) {
this->expandNoCopy((sz + S) * M);
}
for(size_t i = 0; i < sz; i++) {
assert_in(b[i], iupacs);
this->cs_[i] = asc2dnamask[(int)b[i]];
assert_range((int)this->cs_[i], 0, 15);
}
this->len_ = sz;
}
/**
* Copy C++ string of ASCII DNA characters into normal DNA
* characters.
*/
virtual void installChars(const std::basic_string<char>& str) {
installChars(str.c_str(), str.length());
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void set(int c, size_t idx) {
assert_lt(idx, this->len_);
assert_range(c, 0, 15);
this->cs_[idx] = c;
}
/**
* Append DNA char c.
*/
void append(const char& c) {
while(this->sz_ < this->len_+1) {
this->expandNoCopy((this->len_ + 1 + S) * M);
}
assert_range((int)c, 0, 15);
this->cs_[this->len_++] = c;
}
/**
* Set DNA character at index 'idx' to 'c'.
*/
void setChar(char c, size_t idx) {
assert_lt(idx, this->len_);
assert_in(toupper(c), iupacs);
this->cs_[idx] = asc2dnamask[(int)c];
}
/**
* Append DNA character.
*/
void appendChar(char c) {
while(this->sz_ < this->len_+1) {
expandNoCopy((this->len_ + 1 + S) * M);
}
assert_in(toupper(c), iupacs);
this->cs_[this->len_++] = asc2dnamask[(int)c];
}
/**
* Return DNA character corresponding to element 'idx'.
*/
char toChar(size_t idx) const {
assert_range((int)this->cs_[idx], 0, 15);
return mask2iupac[(int)this->cs_[idx]];
}
/**
* Retrieve constant version of element i.
*/
const char& operator[](size_t i) const {
return this->get(i);
}
/**
* Retrieve mutable version of element i.
*/
char& operator[](size_t i) {
return this->get(i);
}
/**
* Retrieve constant version of element i.
*/
const char& get(size_t i) const {
assert_lt(i, this->len_);
assert_range((int)this->cs_[i], 0, 15);
return this->cs_[i];
}
/**
* Retrieve mutable version of element i.
*/
char& get(size_t i) {
assert_lt(i, this->len_);
assert_range((int)this->cs_[i], 0, 15);
return this->cs_[i];
}
/**
* Return the ith character in the window defined by fw, color,
* depth and len.
*/
char windowGetDna(
size_t i,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_lt(i, len);
assert_leq(len, this->len_ - depth);
if(fw) return this->cs_[depth+i];
else return maskcomp[this->cs_[depth+len-i-1]];
}
/**
* Fill the given DNA buffer with the substring specified by fw,
* color, depth and len.
*/
void windowGetDna(
SDnaStringFixed<S>& buf,
bool fw,
size_t depth = 0,
size_t len = 0) const
{
if(len == 0) len = this->len_;
assert_leq(len, this->len_ - depth);
for(size_t i = 0; i < len; i++) {
buf.append(fw ? this->cs_[depth+i] :
maskcomp[this->cs_[depth+len-i-1]]);
}
}
/**
* Sample a random substring of the given length from this DNA
* string and install the result in 'dst'.
*/
template<typename T>
void randSubstr(
RandomSource& rnd, // pseudo-random generator
T& dst, // put sampled substring here
size_t len, // length of substring to extract
bool watson = true, // true -> possibly extract from Watson strand
bool crick = true) // true -> possibly extract from Crick strand
{
assert(watson || crick);
assert_geq(this->len_, len);
size_t poss = this->len_ - len + 1;
assert_gt(poss, 0);
uint32_t rndoff = (uint32_t)(rnd.nextU32() % poss);
bool fw;
if (watson && !crick) fw = true;
else if(!watson && crick) fw = false;
else {
fw = rnd.nextBool();
}
if(fw) {
// Install Watson substring
for(size_t i = 0; i < len; i++) {
dst[i] = this->cs_[i + rndoff];
}
} else {
// Install Crick substring
for(size_t i = 0; i < len; i++) {
dst[i] = maskcomp[(int)this->cs_[i + rndoff + (len - i - 1)]];
}
}
}
/**
* Put a terminator in the 'len_'th element and then return a
* pointer to the buffer. Useful for printing.
*/
virtual const char* toZBuf() const { return this->toZBufXForm(iupacs); }
};
typedef SStringExpandable<char, 1024, 2> BTString;
typedef SDnaStringExpandable<1024, 2> BTDnaString;
typedef SDnaMaskString<32, 2> BTDnaMask;
#endif /* SSTRING_H_ */
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Velcon-Zheng/bowtie2.git
git@gitee.com:Velcon-Zheng/bowtie2.git
Velcon-Zheng
bowtie2
bowtie2
master

搜索帮助

0d507c66 1850385 C8b1a773 1850385