|
38 | 38 |
|
39 | 39 | \newcommand*{\memlimited}{\textcolor{gray}{\footnotesize\it limited}} |
40 | 40 |
|
| 41 | +% Use as, e.g., \cigarops{MID} to produce M/I/D with the operators in \tt |
| 42 | +\newcommand*{\cigarops}[1]{\cigaropsAux#1*} |
| 43 | +\def\cigaropsAux#1#2*{{\tt #1}\if\relax\detokenize{#2}\relax\else/\cigaropsAux#2*\fi} |
| 44 | + |
41 | 45 | \begin{document} |
42 | 46 |
|
43 | 47 | \input{SAMv1.ver} |
@@ -423,7 +427,7 @@ \subsection{The alignment section: mandatory fields}\label{sec:alnrecord} |
423 | 427 | 3 & {\sf RNAME} & String & {\tt \verb"\*"|\rnameRegexp} & Reference sequence NAME\footnotemark \\ |
424 | 428 | 4 & {\sf POS} & Int & $[0,\,2^{31}-1]$ & 1-based leftmost mapping POSition \\ |
425 | 429 | 5 & {\sf MAPQ} & Int & $[0,\,2^8-1]$ & MAPping Quality \\ |
426 | | - 6 & {\sf CIGAR} & String & {\tt \char92*|([0-9]+[MIDNSHPX=])+} & CIGAR string \\ |
| 430 | + 6 & {\sf CIGAR} & String & {\tt \char92*|([0-9]+[MIDNSHP=X])+} & CIGAR string \\ |
427 | 431 | 7 & {\sf RNEXT} & String & {\tt \verb"\*"|=|\rnameRegexp} & Reference name of the mate/next read \\ |
428 | 432 | 8 & {\sf PNEXT} & Int & $[0,\,2^{31}-1]$ & Position of the mate/next read \\ |
429 | 433 | 9 & {\sf TLEN} & Int & $[-2^{31}+1,\,2^{31}-1]$ & observed Template LENgth \\ |
@@ -554,7 +558,7 @@ \subsection{The alignment section: mandatory fields}\label{sec:alnrecord} |
554 | 558 | \item For mRNA-to-genome alignment, an {\tt N} operation represents an |
555 | 559 | intron. For other types of alignments, the interpretation of {\tt N} |
556 | 560 | is not defined. |
557 | | - \item Sum of lengths of the {\tt M/I/S/=/X} operations shall equal |
| 561 | + \item Sum of lengths of the \cigarops{MIS=X} operations shall equal |
558 | 562 | the length of {\sf SEQ}. |
559 | 563 | \end{itemize} |
560 | 564 | \item {\sf RNEXT}: Reference sequence name of the primary alignment of the NEXT read in the |
@@ -638,7 +642,7 @@ \subsection{The alignment section: mandatory fields}\label{sec:alnrecord} |
638 | 642 |
|
639 | 643 | \item {\sf SEQ}: segment SEQuence. This field can be a `*' when the |
640 | 644 | sequence is not stored. If not a `*', the length of the sequence must |
641 | | - equal the sum of lengths of {\tt M/I/S/=/X} operations in {\sf CIGAR}. |
| 645 | + equal the sum of lengths of \cigarops{MIS=X} operations in {\sf CIGAR}. |
642 | 646 | An `=' denotes the base is identical to the reference base. No |
643 | 647 | assumptions can be made on the letter cases. |
644 | 648 | \item {\sf QUAL}: ASCII of base QUALity plus 33 (same as the quality |
@@ -725,7 +729,7 @@ \section{Recommended Practice for the SAM Format} |
725 | 729 | identical to its mate. |
726 | 730 | \item If all segments in a template are unmapped, their {\sf RNAME} |
727 | 731 | should be set as `*' and {\sf POS} as 0. |
728 | | - \item If {\sf POS} plus the sum of lengths of {\tt M/=/X/D/N} |
| 732 | + \item If {\sf POS} plus the sum of lengths of \cigarops{M=XDN} |
729 | 733 | operations in {\sf CIGAR} exceeds the length specified in the {\tt |
730 | 734 | LN} field of the {\tt @SQ} header line (if exists) with an SN |
731 | 735 | equal to {\sf RNAME}, the alignment should be unmapped, unless the |
@@ -757,7 +761,7 @@ \section{Recommended Practice for the SAM Format} |
757 | 761 | Mappings that cross the coordinate `join' in circular reference sequences (i.e., those whose {\tt @SQ} headers specify {\tt TP:circular}) may be represented as follows: |
758 | 762 | \begin{enumerate}[label=\arabic*] |
759 | 763 | \item (Preferred) |
760 | | -As usual {\sf POS} should be between 1 and the {\tt @SQ} header's {\tt LN} value, but {\sf POS} plus the sum of the lengths of {\tt M/=/X/D/N} {\sf CIGAR} operations may exceed {\tt LN}. |
| 764 | +As usual {\sf POS} should be between 1 and the {\tt @SQ} header's {\tt LN} value, but {\sf POS} plus the sum of the lengths of \cigarops{M=XDN} {\sf CIGAR} operations may exceed {\tt LN}. |
761 | 765 | Coordinates greater than~{\tt LN} are interpreted by subtracting {\tt LN} so that bases at $\texttt{LN}+1, \texttt{LN}+2, \texttt{LN}+3, \ldots$ are considered to be mapped at positions $1,2,3,\ldots$; thus each (1-based) position $p$ is interpreted as $((p-1)\bmod\texttt{LN})+1$.% |
762 | 766 | \footnote{The impact of this representation on indexing and random access is yet to be explored by implementations.} |
763 | 767 |
|
@@ -1063,7 +1067,7 @@ \subsection{The BAM format} |
1063 | 1067 | & \multicolumn{2}{l|}{\sf next\_pos} & 0-based leftmost pos of the next segment ($=\underline{\sf PNEXT}-1$) & {\tt int32\_t} & [-1] \\\cline{2-6} |
1064 | 1068 | & \multicolumn{2}{l|}{\sf tlen} & Template length ($=\underline{\sf TLEN}$) & {\tt int32\_t} & [0] \\\cline{2-6} |
1065 | 1069 | & \multicolumn{2}{l|}{\sf read\_name} & Read name, {\tt NUL}-terminated (\underline{\sf QNAME} with trailing `{\tt\verb"\0"}')\footnotemark & {\tt char[{\sf l\_read\_name}]} & \\\cline{2-6} |
1066 | | - & \multicolumn{2}{l|}{\sf cigar} & CIGAR: {\tt {\sf op\_len}\char60\char60 4\char124{\sf op}}. `{\tt MIDNSHP\char61X}'$\to$`012345678' & {\tt uint32\_t[{\sf n\_cigar\_op}]} & \\\cline{2-6} |
| 1070 | + & \multicolumn{2}{l|}{\sf cigar} & CIGAR: {\tt {\sf op\_len}\char60\char60 4\char124{\sf op}}. `{\tt MIDNSHP=X}'$\to$`012345678' & {\tt uint32\_t[{\sf n\_cigar\_op}]} & \\\cline{2-6} |
1067 | 1071 | & \multicolumn{2}{l|}{\sf seq} & 4-bit encoded read: `{\tt =ACMGRSVTWYHKDBN}'$\to[0,15]$. See Section~\ref{sec:seq} & {\tt uint8\_t[({\sf l\_seq}+1)/2]} & \\\cline{2-6} |
1068 | 1072 | & \multicolumn{2}{l|}{\sf qual} & Phred-scaled base qualities. See Section~\ref{sec:seq} & {\tt char[{\sf l\_seq}]} & \\\cline{2-6} |
1069 | 1073 | & \multicolumn{5}{c|}{\textcolor{gray}{\it List of auxiliary data (until the end of the alignment block)}} \\\cline{3-6} |
@@ -1513,6 +1517,7 @@ \subsection*{1.3: July 2010 to April 2011} |
1513 | 1517 | \begin{itemize} |
1514 | 1518 | \item Add {\tt RG PG} header field. (Nov 2010) |
1515 | 1519 | \item Add BAM description and index sections. (Nov 2010) |
| 1520 | +\item \textbf{Add `{\tt =}' and `{\tt X}' CIGAR operations.} (July 2010) |
1516 | 1521 | \item \textbf{Removal of FLAG letters.} (July 2010) |
1517 | 1522 | \item The {\tt SM} header field, previously mandatory for {\tt @RG}, is now |
1518 | 1523 | optional. (July 2010) |
|
0 commit comments