2424package htsjdk .samtools ;
2525
2626
27+ import htsjdk .variant .variantcontext .VariantContext ;
28+
2729import java .math .BigInteger ;
2830import java .net .URI ;
2931import java .net .URISyntaxException ;
@@ -57,23 +59,27 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
5759
5860
5961 /**
60- * This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
62+ * This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format
6163 * to mean "same reference as RNAME field."
6264 */
63- public static final String RESERVED_MRNM_SEQUENCE_NAME = "=" ;
65+
66+ public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=" ;
67+
68+ /* use RESERVED_RNEXT_SEQUENCE_NAME instead. */
69+ @ Deprecated
70+ public static final String RESERVED_MRNM_SEQUENCE_NAME = RESERVED_RNEXT_SEQUENCE_NAME ;
6471
6572 /**
6673 * The standard tags are stored in text header without type information, because the type of these tags is known.
6774 */
6875 public static final Set <String > STANDARD_TAGS =
69- new HashSet <String >(Arrays .asList (SEQUENCE_NAME_TAG , SEQUENCE_LENGTH_TAG , ASSEMBLY_TAG , MD5_TAG , URI_TAG ,
70- SPECIES_TAG ));
76+ new HashSet <>(Arrays .asList (SEQUENCE_NAME_TAG , SEQUENCE_LENGTH_TAG , ASSEMBLY_TAG , MD5_TAG , URI_TAG , SPECIES_TAG ));
7177
72- // Split on any whitespace
73- private static final Pattern SEQUENCE_NAME_SPLITTER = Pattern .compile ("\\ s" );
7478 // These are the chars matched by \\s.
7579 private static final char [] WHITESPACE_CHARS = {' ' , '\t' , '\n' , '\013' , '\f' , '\r' }; // \013 is vertical tab
7680
81+ private static final Pattern LEGAL_RNAME_PATTERN = Pattern .compile ("[0-9A-Za-z!#$%&+./:;?@^_|~-][0-9A-Za-z!#$%&*+./:;=?@^_|~-]*" );
82+
7783 /**
7884 * @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
7985 * sequenceLength is required for the object to be considered valid.
@@ -85,9 +91,6 @@ public SAMSequenceRecord(final String name) {
8591
8692 public SAMSequenceRecord (final String name , final int sequenceLength ) {
8793 if (name != null ) {
88- if (SEQUENCE_NAME_SPLITTER .matcher (name ).find ()) {
89- throw new SAMException ("Sequence name contains invalid character: " + name );
90- }
9194 validateSequenceName (name );
9295 mSequenceName = name .intern ();
9396 } else {
@@ -188,8 +191,8 @@ public final SAMSequenceRecord clone() {
188191 public static String truncateSequenceName (final String sequenceName ) {
189192 /*
190193 * Instead of using regex split, do it manually for better performance.
191- return SEQUENCE_NAME_SPLITTER.split(sequenceName, 2)[0];
192- */
194+ */
195+
193196 int truncateAt = sequenceName .length ();
194197 for (final char c : WHITESPACE_CHARS ) {
195198 int index = sequenceName .indexOf (c );
@@ -204,8 +207,8 @@ public static String truncateSequenceName(final String sequenceName) {
204207 * Throw an exception if the sequence name is not valid.
205208 */
206209 public static void validateSequenceName (final String name ) {
207- if (RESERVED_MRNM_SEQUENCE_NAME . equals (name )) {
208- throw new SAMException ("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name" );
210+ if (! LEGAL_RNAME_PATTERN . matcher (name ). useAnchoringBounds ( true ). matches ( )) {
211+ throw new SAMException (String . format ( "Sequence name '%s' doesn't match regex: '%s' " , name , LEGAL_RNAME_PATTERN ) );
209212 }
210213 }
211214
0 commit comments