@@ -76,6 +76,7 @@ Columns:
7676 16. Q30(%) percentage of bases with the quality score greater than 30
7777 17. AvgQual average quality
7878 18. GC(%) percentage of GC content
79+ 19. sum_n number of ambitious letters (N, n, X, x)
7980
8081Attention:
8182 1. Sequence length metrics (sum_len, min_len, avg_len, max_len, Q1, Q2, Q3)
@@ -109,6 +110,7 @@ Tips:
109110 }
110111 gapLettersBytes := []byte (gapLetters )
111112 gcLettersBytes := []byte {'g' , 'c' , 'G' , 'C' }
113+ nLettersBytes := []byte {'X' , 'x' , 'N' , 'n' }
112114
113115 skipFileCheck := getFlagBool (cmd , "skip-file-check" )
114116 all := getFlagBool (cmd , "all" )
@@ -194,7 +196,7 @@ Tips:
194196 "max_len" ,
195197 }
196198 if all {
197- colnames = append (colnames , []string {"Q1" , "Q2" , "Q3" , "sum_gap" , "N50" , "N50_num" , "Q20(%)" , "Q30(%)" , "AvgQual" , "GC(%)" }... )
199+ colnames = append (colnames , []string {"Q1" , "Q2" , "Q3" , "sum_gap" , "N50" , "N50_num" , "Q20(%)" , "Q30(%)" , "AvgQual" , "GC(%)" , "sum_n" }... )
198200 }
199201
200202 if hasNX {
@@ -242,7 +244,7 @@ Tips:
242244 info .lenAvg ,
243245 info .lenMax )
244246 if all {
245- fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f" ,
247+ fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f\t %d " ,
246248 info .Q1 ,
247249 info .Q2 ,
248250 info .Q3 ,
@@ -252,7 +254,9 @@ Tips:
252254 info .q20 ,
253255 info .q30 ,
254256 info .avgQual ,
255- info .gc )
257+ info .gc ,
258+ info .nSum ,
259+ )
256260 }
257261 if hasNX {
258262 for _ , x = range info .nx {
@@ -283,7 +287,7 @@ Tips:
283287 info .lenAvg ,
284288 info .lenMax )
285289 if all {
286- fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f" ,
290+ fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f\t %d " ,
287291 info .Q1 ,
288292 info .Q2 ,
289293 info .Q3 ,
@@ -293,7 +297,9 @@ Tips:
293297 info .q20 ,
294298 info .q30 ,
295299 info .avgQual ,
296- info .gc )
300+ info .gc ,
301+ info .nSum ,
302+ )
297303 }
298304 if hasNX {
299305 for _ , x = range info .nx {
@@ -332,7 +338,7 @@ Tips:
332338 info .lenAvg ,
333339 info .lenMax )
334340 if all {
335- fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f" ,
341+ fmt .Fprintf (outfh , "\t %.1f\t %.1f\t %.1f\t %d\t %d\t %d\t %.2f\t %.2f\t %.2f\t %.2f\t %d " ,
336342 info .Q1 ,
337343 info .Q2 ,
338344 info .Q3 ,
@@ -342,7 +348,9 @@ Tips:
342348 info .q20 ,
343349 info .q30 ,
344350 info .avgQual ,
345- info .gc )
351+ info .gc ,
352+ info .nSum ,
353+ )
346354 }
347355 if hasNX {
348356 for _ , x = range info .nx {
@@ -400,6 +408,7 @@ Tips:
400408
401409 var gapSum uint64
402410 var gcSum uint64
411+ var nSum uint64
403412
404413 lensStats := util .NewLengthStats ()
405414
@@ -478,6 +487,7 @@ Tips:
478487
479488 gapSum += uint64 (byteutil .CountBytes (record .Seq .Seq , gapLettersBytes ))
480489 gcSum += uint64 (byteutil .CountBytes (record .Seq .Seq , gcLettersBytes ))
490+ nSum += uint64 (byteutil .CountBytes (record .Seq .Seq , nLettersBytes ))
481491 }
482492 }
483493
@@ -528,7 +538,7 @@ Tips:
528538 file = stdinLabel
529539 }
530540 ch <- statInfo {file , seqFormat , t ,
531- 0 , 0 , 0 , 0 ,
541+ 0 , 0 , 0 , 0 , 0 ,
532542 0 , 0 , 0 , 0 ,
533543 0 , 0 , 0 ,
534544 0 , 0 , 0 , 0 ,
@@ -542,7 +552,7 @@ Tips:
542552 file = stdinLabel
543553 }
544554 ch <- statInfo {file , seqFormat , t ,
545- lensStats .Count (), lensStats .Sum (), gapSum , lensStats .Min (),
555+ lensStats .Count (), lensStats .Sum (), gapSum , lensStats .Min (), nSum ,
546556 mathutil .Round (lensStats .Mean (), 1 ), lensStats .Max (), n50 , l50 ,
547557 q1 , q2 , q3 ,
548558 mathutil .Round (float64 (q20 )/ float64 (lensStats .Sum ())* 100 , 2 ),
@@ -601,6 +611,7 @@ Tips:
601611 {Header : "Q30(%)" , Align : stable .AlignRight , HumanizeNumbers : true },
602612 {Header : "AvgQual" , Align : stable .AlignRight , HumanizeNumbers : true },
603613 {Header : "GC(%)" , Align : stable .AlignRight , HumanizeNumbers : true },
614+ {Header : "sum_n" , Align : stable .AlignRight , HumanizeNumbers : true },
604615 // {Header: "L50", AlignRight: true},
605616 }... )
606617 }
@@ -634,6 +645,7 @@ Tips:
634645 row = append (row , info .q30 )
635646 row = append (row , info .avgQual )
636647 row = append (row , info .gc )
648+ row = append (row , info .nSum )
637649 }
638650 if hasNX {
639651 for _ , x = range info .nx {
@@ -656,6 +668,7 @@ type statInfo struct {
656668 lenSum uint64
657669 gapSum uint64
658670 lenMin uint64
671+ nSum uint64
659672
660673 lenAvg float64
661674 lenMax uint64
0 commit comments