diff --git a/readfq.py b/readfq.py index 1fdcbf6..a205ed3 100644 --- a/readfq.py +++ b/readfq.py @@ -7,14 +7,15 @@ def readfq(fp): # this is a generator function last = l[:-1] # save this line break if not last: break - name, seqs, last = last[1:].partition(" ")[0], [], None + name, _, comment = last[1:].partition(" ") + seqs, last = [], None for l in fp: # read the sequence if l[0] in '@+>': last = l[:-1] break seqs.append(l[:-1]) if not last or last[0] != '+': # this is a fasta record - yield name, ''.join(seqs), None # yield a fasta record + yield name, comment, ''.join(seqs), None # yield a fasta record if not last: break else: # this is a fastq record seq, leng, seqs = ''.join(seqs), 0, [] @@ -23,16 +24,16 @@ def readfq(fp): # this is a generator function leng += len(l) - 1 if leng >= len(seq): # have read enough quality last = None - yield name, seq, ''.join(seqs); # yield a fastq record + yield name, comment, seq, ''.join(seqs); # yield a fastq record break if last: # reach EOF before reading enough quality - yield name, seq, None # yield a fasta record instead + yield name, comment, seq, None # yield a fasta record instead break if __name__ == "__main__": import sys n, slen, qlen = 0, 0, 0 - for name, seq, qual in readfq(sys.stdin): + for name, comment, seq, qual in readfq(sys.stdin): n += 1 slen += len(seq) qlen += qual and len(qual) or 0