Skip to content

Commit b5a1044

Browse files
committed
nthash.hpp: added slide left functions
1 parent fb22431 commit b5a1044

1 file changed

Lines changed: 73 additions & 0 deletions

File tree

nthash.hpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,37 @@ inline uint64_t NTC64(const unsigned char charOut, const unsigned char charIn, c
165165
return (rhVal<fhVal)? rhVal : fhVal;
166166
}
167167

168+
// forward-strand ntHash for sliding k-mers to the left
169+
inline uint64_t NTF64L(const uint64_t rhVal, const unsigned k, const unsigned char charOut, const unsigned char charIn) {
170+
uint64_t lBits = seedTab[charIn] >> 33;
171+
uint64_t rBits = seedTab[charIn] & 0x1FFFFFFFF;
172+
uint64_t sIn = (rol31(lBits,k) << 33) | (rol33(rBits,k));
173+
uint64_t hVal = rhVal ^ sIn;
174+
hVal ^= seedTab[charOut];
175+
hVal = ror1(hVal);
176+
hVal = swapbits3263(hVal);
177+
return hVal;
178+
}
179+
180+
// reverse-complement ntHash for sliding k-mers to the left
181+
inline uint64_t NTR64L(const uint64_t fhVal, const unsigned k, const unsigned char charOut, const unsigned char charIn) {
182+
uint64_t hVal = rol1(fhVal);
183+
hVal = swapbits033(hVal);
184+
hVal ^= seedTab[charIn&cpOff];
185+
uint64_t lBits = seedTab[charOut&cpOff] >> 33;
186+
uint64_t rBits = seedTab[charOut&cpOff] & 0x1FFFFFFFF;
187+
uint64_t sOut = (rol31(lBits,k) << 33) | (rol33(rBits,k));
188+
hVal ^= sOut;
189+
return hVal;
190+
}
191+
192+
// canonical ntHash for sliding k-mers to the left
193+
inline uint64_t NTC64L(const unsigned char charOut, const unsigned char charIn, const unsigned k, uint64_t& fhVal, uint64_t& rhVal) {
194+
fhVal = NTF64L(fhVal, k, charOut, charIn);
195+
rhVal = NTR64L(rhVal, k, charOut, charIn);
196+
return (rhVal<fhVal)? rhVal : fhVal;
197+
}
198+
168199
// ntBase with seeding option
169200
inline uint64_t NTF64(const char * kmerSeq, const unsigned k, const unsigned seed) {
170201
uint64_t hVal=NTF64(kmerSeq, k);
@@ -353,4 +384,46 @@ inline bool NTMC64(const char *kmerSeq, const unsigned k, const unsigned m, uint
353384
return true;
354385
}
355386

387+
// strand-aware canonical multihash ntHash
388+
inline bool NTMC64(const char *kmerSeq, const unsigned k, const unsigned m, uint64_t& fhVal, uint64_t& rhVal, unsigned& locN, uint64_t* hVal, bool& hStn) {
389+
fhVal=rhVal=0;
390+
uint64_t bVal=0, tVal=0;
391+
locN=0;
392+
for(int i=k-1; i>=0; i--) {
393+
if(seedTab[(unsigned char)kmerSeq[i]]==seedN) {
394+
locN=i;
395+
return false;
396+
}
397+
fhVal = rol1(fhVal);
398+
fhVal = swapbits033(fhVal);
399+
fhVal ^= seedTab[(unsigned char)kmerSeq[k-1-i]];
400+
401+
rhVal = rol1(rhVal);
402+
rhVal = swapbits033(rhVal);
403+
rhVal ^= seedTab[(unsigned char)kmerSeq[i]&cpOff];
404+
}
405+
hStn = rhVal<fhVal;
406+
bVal = hStn? rhVal : fhVal;
407+
hVal[0] = bVal;
408+
for(unsigned i=1; i<m; i++) {
409+
tVal = bVal * (i ^ k * multiSeed);
410+
tVal ^= tVal >> multiShift;
411+
hVal[i] = tVal;
412+
}
413+
return true;
414+
}
415+
416+
// starnd-aware canonical multihash ntHash for sliding k-mers
417+
inline void NTMC64(const unsigned char charOut, const unsigned char charIn, const unsigned k, const unsigned m, uint64_t& fhVal, uint64_t& rhVal, uint64_t *hVal, bool &hStn) {
418+
uint64_t bVal=0, tVal=0;
419+
bVal = NTC64(charOut, charIn, k, fhVal, rhVal);
420+
hStn = rhVal<fhVal;
421+
hVal[0] = bVal;
422+
for(unsigned i=1; i<m; i++) {
423+
tVal = bVal * (i ^ k * multiSeed);
424+
tVal ^= tVal >> multiShift;
425+
hVal[i] = tVal;
426+
}
427+
}
428+
356429
#endif

0 commit comments

Comments
 (0)