Skip to content

Commit acf09ff

Browse files
author
Rob Patro
committed
Merge branch 'develop'
2 parents 82898c0 + 98e4cd5 commit acf09ff

4 files changed

Lines changed: 131 additions & 12 deletions

File tree

include/RapMapUtils.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "chobo/small_vector.hpp"
3737
#include "RapMapConfig.hpp"
3838
#include "nonstd/optional.hpp"
39+
#include "FastxParser.hpp"
3940

4041
#ifdef RAPMAP_SALMON_SUPPORT
4142
#include "LibraryFormat.hpp"
@@ -821,6 +822,10 @@ namespace rapmap {
821822

822823
std::string reverseComplement(std::string& seq);
823824

825+
826+
uint32_t writeUnalignedSingleToStream(fastx_parser::ReadSeq& r, fmt::MemoryWriter& sstream);
827+
uint32_t writeUnalignedPairToStream(fastx_parser::ReadPair& r, fmt::MemoryWriter& sstream);
828+
824829
template <typename ReadPairT, typename IndexT>
825830
uint32_t writeAlignmentsToStream(
826831
ReadPairT& r,
@@ -1262,7 +1267,7 @@ namespace rapmap {
12621267
template <typename Archive>
12631268
void save(Archive& archive, const my_mer& mer);
12641269
1265-
template <typename Archive>
1270+
teplate <typename Archive>
12661271
void load(Archive& archive, my_mer& mer);
12671272
*/
12681273
}

src/FastxParser.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ int parseReads(
158158
while (!seqContainerQueue_.try_dequeue(*cCont, local)) {
159159
fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
160160
// Think of a way to do this that wouldn't be loud (or would allow a user-definable logging mechanism)
161-
// std::cerr << "couldn't dequeue read chunk\n";
161+
//std::cerr << "couldn't dequeue read chunk\n";
162162
}
163163
size_t numObtained{local->size()};
164164
// open the file and init the parser
@@ -196,7 +196,10 @@ int parseReads(
196196
if (ksv == -3) {
197197
--numParsing;
198198
return -3;
199-
}
199+
} else if (ksv < -1) {
200+
--numParsing;
201+
return ksv;
202+
}
200203

201204
// If we hit the end of the file and have any reads in our local buffer
202205
// then dump them here.
@@ -207,7 +210,13 @@ int parseReads(
207210
fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
208211
}
209212
numWaiting = 0;
213+
} else if (numObtained > 0){
214+
curMaxDelay = MIN_BACKOFF_ITERS;
215+
while (!seqContainerQueue_.try_enqueue(std::move(local))) {
216+
fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
217+
}
210218
}
219+
211220
// destroy the parser and close the file
212221
kseq_destroy(seq);
213222
gzclose(fp);
@@ -300,7 +309,13 @@ int parseReadPair(
300309
fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
301310
}
302311
numWaiting = 0;
312+
} else if (numObtained > 0){
313+
curMaxDelay = MIN_BACKOFF_ITERS;
314+
while (!seqContainerQueue_.try_enqueue(std::move(local))) {
315+
fastx_parser::thread_utils::backoffOrYield(curMaxDelay);
316+
}
303317
}
318+
304319
// destroy the parser and close the file
305320
kseq_destroy(seq);
306321
gzclose(fp);

src/RapMapSAMapper.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ struct MappingOpts {
148148
bool noOrphans{false};
149149
bool noDovetail{false};
150150
bool recoverOrphans{false};
151+
bool writeUnmapped{false};
151152
};
152153

153154
using AlnCacheMap = selective_alignment::utils::AlnCacheMap;
@@ -315,15 +316,13 @@ void processReadsSingleSA(single_parser * parser,
315316
}
316317
}
317318

318-
if (hits.size() > 0 and !mopts->noOutput) {
319-
/*
320-
std::sort(hits.begin(), hits.end(),
321-
[](const QuasiAlignment& a, const QuasiAlignment& b) -> bool {
322-
return a.tid < b.tid;
323-
});
324-
*/
319+
if (!mopts->noOutput){
320+
if (hits.size() > 0) {
325321
rapmap::utils::writeAlignmentsToStream(read, formatter,
326322
hctr, hits, sstream);
323+
} else {
324+
rapmap::utils::writeUnalignedSingleToStream(read, sstream);
325+
}
327326
}
328327

329328
if (hctr.numReads > hctr.lastPrint + 1000000) {
@@ -702,9 +701,13 @@ void processReadsPairSA(paired_parser* parser,
702701
hctr.totHits += jointHits.size();
703702

704703
// If we have reads to output, and we're writing output.
705-
if (jointHits.size() > 0 and !mopts->noOutput and jointHits.size() <= mopts->maxNumHits) {
704+
if (!mopts->noOutput) {
705+
if (jointHits.size() > 0 and jointHits.size() <= mopts->maxNumHits) {
706706
rapmap::utils::writeAlignmentsToStream(rpair, formatter,
707707
hctr, jointHits, sstream);
708+
} else {
709+
rapmap::utils::writeUnalignedPairToStream(rpair, sstream);
710+
}
708711
}
709712

710713
if (hctr.numReads > hctr.lastPrint + 1000000) {
@@ -1001,10 +1004,11 @@ int rapMapSAMap(int argc, char* argv[]) {
10011004
TCLAP::SwitchArg chain("c", "chaining", "Score the hits to find the best chain", false);
10021005
TCLAP::SwitchArg compressedOutput("x", "compressed", "Compress the output SAM file using zlib", false);
10031006
TCLAP::SwitchArg quiet("q", "quiet", "Disable all console output apart from warnings and errors", false);
1007+
TCLAP::SwitchArg writeUnmapped("u", "writeUnmapped", "include unmapped reads in the output SAM records", false);
1008+
10041009
TCLAP::SwitchArg recoverOrphans("", "recoverOrphans", "perform orphan recovery to try and recover endpoints of orphaned reads", false);
10051010
TCLAP::SwitchArg noDovetail("", "noDovetail", "discard dovetailing mappings", false);
10061011
TCLAP::SwitchArg noOrphans("", "noOrphans", "discard orphaned mappings", false);
1007-
10081012
TCLAP::SwitchArg selAln("s", "selAln", "Perform selective alignment to validate mapping hits", false);
10091013
TCLAP::ValueArg<int16_t> gapOpenPen("", "go", "[only with selAln]: gap open penalty", false, 4, "positive integer");
10101014
TCLAP::ValueArg<int16_t> gapExtendPen("", "ge", "[only with selAln]: gap extend penalty", false, 2, "positive integer");
@@ -1026,6 +1030,7 @@ int rapMapSAMap(int argc, char* argv[]) {
10261030
cmd.add(fuzzy);
10271031
cmd.add(chain);
10281032
cmd.add(quiet);
1033+
cmd.add(writeUnmapped);
10291034
cmd.add(hardFilter);
10301035
cmd.add(recoverOrphans);
10311036
cmd.add(noDovetail);
@@ -1099,6 +1104,7 @@ int rapMapSAMap(int argc, char* argv[]) {
10991104
} else {
11001105
mopts.unmatedReads = unmatedReads.getValue();
11011106
}
1107+
mopts.writeUnmapped = writeUnmapped.getValue();
11021108
mopts.numThreads = numThreads.getValue();
11031109
mopts.maxNumHits = maxNumHits.getValue();
11041110
mopts.outname = (outname.isSet()) ? outname.getValue() : "";

src/RapMapUtils.cpp

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,99 @@ namespace rapmap {
133133
return work;
134134
}
135135

136+
137+
uint32_t writeUnalignedPairToStream(fastx_parser::ReadPair& r,
138+
fmt::MemoryWriter& sstream) {
139+
constexpr uint16_t flags1 = 0x1 | 0x4 | 0x8 | 0x40;
140+
constexpr uint16_t flags2 = 0x1 | 0x4 | 0x8 | 0x80;
141+
142+
auto processReadName = [](const std::string& name) -> fmt::StringRef {
143+
nonstd::string_view readNameView(name);
144+
// If the read name contains multiple space-separated parts,
145+
// print only the first
146+
size_t splitPos = readNameView.find(' ');
147+
if (splitPos < readNameView.length()) {
148+
readNameView.remove_suffix(readNameView.length() - splitPos);
149+
} else {
150+
splitPos = readNameView.length();
151+
}
152+
153+
// trim /1 from the pe read
154+
if (splitPos > 2 and readNameView[splitPos - 2] == '/') {
155+
readNameView.remove_suffix(2);
156+
//readName[splitPos - 2] = '\0';
157+
}
158+
return fmt::StringRef(readNameView.data(), readNameView.size());
159+
};
160+
161+
auto readNameView = processReadName(r.first.name);
162+
auto mateNameView = processReadName(r.second.name);
163+
std::string* readSeq1 = &(r.first.seq);
164+
std::string* readSeq2 = &(r.second.seq);
165+
166+
sstream << readNameView << '\t' // QNAME
167+
<< flags1 << '\t' // FLAGS
168+
<< "*\t" // RNAME
169+
<< "0\t" // POS (1-based)
170+
<< "255\t" // MAPQ
171+
<< "*\t" // CIGAR
172+
<< "*\t" // RNEXT
173+
<< "*\t" // PNEXT
174+
<< "0\t" // TLEN
175+
<< *readSeq1 << '\t' // SEQ
176+
<< "*\t" // QUAL
177+
<< "NH:i:0\t"
178+
<< "HI:i:0\t"
179+
<< "AS:i:0\n";
180+
181+
sstream << mateNameView << '\t' // QNAME
182+
<< flags2 << '\t' // FLAGS
183+
<< "*\t" // RNAME
184+
<< "0\t" // POS (1-based)
185+
<< "255\t" // MAPQ
186+
<< "*\t" // CIGAR
187+
<< "*\t" // RNEXT
188+
<< "*\t" // PNEXT
189+
<< "0\t" // TLEN
190+
<< *readSeq2 << '\t' // SEQ
191+
<< "*\t" // QUAL
192+
<< "NH:i:0\t"
193+
<< "HI:i:0\t"
194+
<< "AS:i:0\n";
195+
return 0;
196+
}
197+
198+
uint32_t writeUnalignedSingleToStream(fastx_parser::ReadSeq& r,
199+
fmt::MemoryWriter& sstream) {
200+
constexpr uint16_t flags = 0x4;
201+
202+
nonstd::string_view readNameViewSV(r.name);
203+
// If the read name contains multiple space-separated parts, print
204+
// only the first
205+
size_t splitPos = readNameViewSV.find(' ');
206+
if (splitPos < readNameViewSV.length()) {
207+
readNameViewSV.remove_suffix(readNameViewSV.size() - splitPos);
208+
}
209+
fmt::StringRef readNameView(readNameViewSV.data(), readNameViewSV.size());
210+
std::string* readSeq = &(r.seq);
211+
212+
sstream << readNameView << '\t' // QNAME
213+
<< flags << '\t' // FLAGS
214+
<< "*\t" // RNAME
215+
<< 0 << '\t' // POS (1-based)
216+
<< 255 << '\t' // MAPQ
217+
<< "*\t" // CIGAR
218+
<< "*\t" // MATE NAME
219+
<< "0\t" // MATE POS
220+
<< "0\t" // TLEN
221+
<< *readSeq << '\t' // SEQ
222+
<< "*\t" // QSTR
223+
<< "NH:i:0\t"
224+
<< "HI:i:0\t"
225+
<< "AS:i:0\n";
226+
return 0;
227+
}
228+
136229
template <typename ReadT, typename IndexT>
137230
uint32_t writeAlignmentsToStream(
138231
ReadT& r,

0 commit comments

Comments
 (0)