Skip to content

Commit 5240c40

Browse files
Yossi FarjounmagicDGS
andauthored
Add support for SQ-AN in SAMSequenceDictionary #956 (#1474)
* Add support for SQ-AN in SAMSequenceDictionary Co-authored-by: Daniel Gómez-Sánchez <daniel.gomez.sanchez@hotmail.es>
1 parent cd01700 commit 5240c40

6 files changed

Lines changed: 369 additions & 73 deletions

File tree

src/main/java/htsjdk/samtools/SAMSequenceDictionary.java

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -67,19 +67,12 @@ public SAMSequenceRecord getSequence(final String name) {
6767
* Replaces the existing list of SAMSequenceRecords with the given list.
6868
* Reset the aliases
6969
*
70-
* @param list This value is used directly, rather than being copied.
70+
* @param list This value is copied and validated.
7171
*/
7272
public void setSequences(final List<SAMSequenceRecord> list) {
73-
mSequences = list;
73+
mSequences = new ArrayList<>(list.size());
7474
mSequenceMap.clear();
75-
int index = 0;
76-
for (final SAMSequenceRecord record : list) {
77-
record.setSequenceIndex(index++);
78-
if (mSequenceMap.put(record.getSequenceName(), record) != null) {
79-
throw new IllegalArgumentException("Cannot add sequence that already exists in SAMSequenceDictionary: " +
80-
record.getSequenceName());
81-
}
82-
}
75+
list.forEach(this::addSequence);
8376
}
8477

8578
public void addSequence(final SAMSequenceRecord sequenceRecord) {
@@ -90,6 +83,7 @@ public void addSequence(final SAMSequenceRecord sequenceRecord) {
9083
sequenceRecord.setSequenceIndex(mSequences.size());
9184
mSequences.add(sequenceRecord);
9285
mSequenceMap.put(sequenceRecord.getSequenceName(), sequenceRecord);
86+
sequenceRecord.getAlternativeSequenceNames().forEach(an -> addSequenceAlias(sequenceRecord.getSequenceName(), an));
9387
}
9488

9589
/**
@@ -194,7 +188,11 @@ public boolean isSameDictionary(final SAMSequenceDictionary that) {
194188
return !thatSequences.hasNext();
195189
}
196190

197-
/** returns true if the two dictionaries are the same, aliases are NOT considered */
191+
/**
192+
* Returns {@code true} if the two dictionaries are the same.
193+
*
194+
* <p>NOTE: Aliases are NOT considered, but alternative sequence names (AN tag) names ARE.
195+
*/
198196
@Override
199197
public boolean equals(Object o) {
200198
if (this == o) return true;
@@ -211,10 +209,11 @@ public boolean equals(Object o) {
211209
* <code>1,chr1,chr01,01,CM000663,NC_000001.10</code> e.g:
212210
* <code>MT,chrM</code>
213211
*
214-
* @param originalName
215-
* existing contig name
216-
* @param altName
217-
* new contig name
212+
* <p>NOTE: this method does not add the alias to the alternative sequence name tag (AN) in the SAMSequenceRecord.
213+
* If you would like to add it to the AN tag, use {@link #addAlternativeSequenceName(String, String)} instead.
214+
*
215+
* @param originalName existing contig name
216+
* @param altName new contig name
218217
* @return the contig associated to the 'originalName/altName'
219218
*/
220219
public SAMSequenceRecord addSequenceAlias(final String originalName,
@@ -230,13 +229,32 @@ public SAMSequenceRecord addSequenceAlias(final String originalName,
230229
// alias was already set to the same record
231230
if (altSeqRecord.equals(originalSeqRecord)) return originalSeqRecord;
232231
// alias was already set to another record
233-
throw new IllegalArgumentException("Alias " + altName +
232+
throw new IllegalArgumentException("Alias " + altName + " for " + originalSeqRecord +
234233
" was already set to " + altSeqRecord.getSequenceName());
235234
}
236235
mSequenceMap.put(altName, originalSeqRecord);
237236
return originalSeqRecord;
238237
}
239238

239+
/**
240+
* Add an alternative sequence name (AN tag) to a SAMSequenceRecord, including it into the aliases
241+
* to retrieve the contigs (as with {@link #addSequenceAlias(String, String)}.
242+
*
243+
* <p>This can be use to provide some alternate names fo a given contig. e.g:
244+
* <code>1,chr1,chr01,01,CM000663</code> or
245+
* <code>MT,chrM</code>.
246+
*
247+
* @param originalName existing contig name
248+
* @param altName new contig name
249+
* @return the contig associated to the 'originalName/altName', with the AN tag including the altName
250+
*/
251+
public SAMSequenceRecord addAlternativeSequenceName(final String originalName,
252+
final String altName) {
253+
final SAMSequenceRecord record = addSequenceAlias(originalName, altName);
254+
record.addAlternativeSequenceName(altName);
255+
return record;
256+
}
257+
240258
/**
241259
* return a MD5 sum for ths dictionary, the checksum is re-computed each
242260
* time this method is called.
@@ -306,7 +324,7 @@ public String toString() {
306324
* @param tagsToMatch list of tags that must be equal if present in both sequence. Must contain MD, and LN
307325
* @return dictionary consisting of the same sequences as the two inputs with the merged values of tags.
308326
*/
309-
static public SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionary dict1,
327+
public static SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionary dict1,
310328
final SAMSequenceDictionary dict2,
311329
final List<String> tagsToMatch) {
312330

@@ -321,8 +339,8 @@ static public SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionar
321339

322340
throw new IllegalArgumentException(String.format("Do not use this function to merge dictionaries with " +
323341
"different sequences in them. Sequences must be in the same order as well. Found [%s] and [%s].",
324-
String.join(", ", dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(toList())),
325-
String.join(", ", dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(toList()))));
342+
dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining(", ")),
343+
dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining(", "))));
326344
}
327345

328346
final SAMSequenceDictionary finalDict = new SAMSequenceDictionary();

0 commit comments

Comments
 (0)