@@ -165,6 +165,37 @@ inline uint64_t NTC64(const unsigned char charOut, const unsigned char charIn, c
165165 return (rhVal<fhVal)? rhVal : fhVal;
166166}
167167
168+ // forward-strand ntHash for sliding k-mers to the left
169+ inline uint64_t NTF64L (const uint64_t rhVal, const unsigned k, const unsigned char charOut, const unsigned char charIn) {
170+ uint64_t lBits = seedTab[charIn] >> 33 ;
171+ uint64_t rBits = seedTab[charIn] & 0x1FFFFFFFF ;
172+ uint64_t sIn = (rol31 (lBits,k) << 33 ) | (rol33 (rBits,k));
173+ uint64_t hVal = rhVal ^ sIn ;
174+ hVal ^= seedTab[charOut];
175+ hVal = ror1 (hVal);
176+ hVal = swapbits3263 (hVal);
177+ return hVal;
178+ }
179+
180+ // reverse-complement ntHash for sliding k-mers to the left
181+ inline uint64_t NTR64L (const uint64_t fhVal, const unsigned k, const unsigned char charOut, const unsigned char charIn) {
182+ uint64_t hVal = rol1 (fhVal);
183+ hVal = swapbits033 (hVal);
184+ hVal ^= seedTab[charIn&cpOff];
185+ uint64_t lBits = seedTab[charOut&cpOff] >> 33 ;
186+ uint64_t rBits = seedTab[charOut&cpOff] & 0x1FFFFFFFF ;
187+ uint64_t sOut = (rol31 (lBits,k) << 33 ) | (rol33 (rBits,k));
188+ hVal ^= sOut ;
189+ return hVal;
190+ }
191+
192+ // canonical ntHash for sliding k-mers to the left
193+ inline uint64_t NTC64L (const unsigned char charOut, const unsigned char charIn, const unsigned k, uint64_t & fhVal, uint64_t & rhVal) {
194+ fhVal = NTF64L (fhVal, k, charOut, charIn);
195+ rhVal = NTR64L (rhVal, k, charOut, charIn);
196+ return (rhVal<fhVal)? rhVal : fhVal;
197+ }
198+
168199// ntBase with seeding option
169200inline uint64_t NTF64 (const char * kmerSeq, const unsigned k, const unsigned seed) {
170201 uint64_t hVal=NTF64 (kmerSeq, k);
@@ -353,4 +384,46 @@ inline bool NTMC64(const char *kmerSeq, const unsigned k, const unsigned m, uint
353384 return true ;
354385}
355386
387+ // strand-aware canonical multihash ntHash
388+ inline bool NTMC64 (const char *kmerSeq, const unsigned k, const unsigned m, uint64_t & fhVal, uint64_t & rhVal, unsigned & locN, uint64_t * hVal, bool & hStn) {
389+ fhVal=rhVal=0 ;
390+ uint64_t bVal=0 , tVal=0 ;
391+ locN=0 ;
392+ for (int i=k-1 ; i>=0 ; i--) {
393+ if (seedTab[(unsigned char )kmerSeq[i]]==seedN) {
394+ locN=i;
395+ return false ;
396+ }
397+ fhVal = rol1 (fhVal);
398+ fhVal = swapbits033 (fhVal);
399+ fhVal ^= seedTab[(unsigned char )kmerSeq[k-1 -i]];
400+
401+ rhVal = rol1 (rhVal);
402+ rhVal = swapbits033 (rhVal);
403+ rhVal ^= seedTab[(unsigned char )kmerSeq[i]&cpOff];
404+ }
405+ hStn = rhVal<fhVal;
406+ bVal = hStn? rhVal : fhVal;
407+ hVal[0 ] = bVal;
408+ for (unsigned i=1 ; i<m; i++) {
409+ tVal = bVal * (i ^ k * multiSeed);
410+ tVal ^= tVal >> multiShift;
411+ hVal[i] = tVal;
412+ }
413+ return true ;
414+ }
415+
416+ // starnd-aware canonical multihash ntHash for sliding k-mers
417+ inline void NTMC64 (const unsigned char charOut, const unsigned char charIn, const unsigned k, const unsigned m, uint64_t & fhVal, uint64_t & rhVal, uint64_t *hVal, bool &hStn) {
418+ uint64_t bVal=0 , tVal=0 ;
419+ bVal = NTC64 (charOut, charIn, k, fhVal, rhVal);
420+ hStn = rhVal<fhVal;
421+ hVal[0 ] = bVal;
422+ for (unsigned i=1 ; i<m; i++) {
423+ tVal = bVal * (i ^ k * multiSeed);
424+ tVal ^= tVal >> multiShift;
425+ hVal[i] = tVal;
426+ }
427+ }
428+
356429#endif
0 commit comments