Skip to content
This repository was archived by the owner on May 22, 2026. It is now read-only.

Commit db38bfe

Browse files
committed
Added validation of reference allele.
1 parent 90d8ee1 commit db38bfe

4 files changed

Lines changed: 45 additions & 8 deletions

File tree

VarDict

src/main/java/com/astrazeneca/vardict/modules/ToVarsBuilder.java

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
import java.util.regex.Pattern;
1616
import java.util.regex.Matcher;
1717
import java.util.*;
18+
import java.util.stream.Collectors;
19+
import java.util.stream.Stream;
1820

1921
import static com.astrazeneca.vardict.data.scopedata.GlobalReadOnlyScope.instance;
2022
import static com.astrazeneca.vardict.Utils.*;
@@ -35,6 +37,21 @@ public class ToVarsBuilder implements Module<RealignedVariationData, AlignedVars
3537
private Map<Integer, Character> ref;
3638
private Double duprate;
3739

40+
// Map of IUPAC ambiguity codes that we can observe in reference.
41+
// By VCF 4.3 specification they aren't allowed and must be reduced to a first alphabetically base.
42+
private Map<String, String> IUPAC_AMBIGUITY_CODES = Stream.of(new String[][] {
43+
{"M","A"},
44+
{"R","A"},
45+
{"W","A"},
46+
{"S","C"},
47+
{"Y","C"},
48+
{"K","G"},
49+
{"V","A"},
50+
{"H","A"},
51+
{"D","A"},
52+
{"B","C"},
53+
}).collect(Collectors.toMap(key -> key[0], key -> key[1]));
54+
3855
public Map<Integer, VariationMap<String, Variation>> getInsertionVariants() {
3956
return insertionVariants;
4057
}
@@ -903,7 +920,7 @@ else if (deletionLength < instance().conf.SVMINLEN) {
903920
vref.shift3 = shift3;
904921
vref.startPosition = startPosition;
905922
vref.endPosition = endPosition;
906-
vref.refallele = refallele;
923+
vref.refallele = validateRefallele(refallele);
907924
vref.varallele = varallele;
908925
vref.genotype = genotype;
909926
vref.totalPosCoverage = totalPosCoverage;
@@ -954,8 +971,8 @@ else if (deletionLength < instance().conf.SVMINLEN) {
954971
vref.highQualityReadsFrequency = roundHalfEven("0.0000", vref.highQualityReadsFrequency);
955972
String referenceBase = ref.containsKey(position) ? ref.get(position).toString() : ""; // $r
956973
//both refallele and varallele are 1 base from reference string
957-
vref.refallele = referenceBase;
958-
vref.varallele = referenceBase;
974+
vref.refallele = validateRefallele(referenceBase);
975+
vref.varallele = validateRefallele(referenceBase);
959976
vref.genotype = referenceBase + "/" + referenceBase;
960977
vref.leftseq = "";
961978
vref.rightseq = "";
@@ -976,6 +993,13 @@ else if (deletionLength < instance().conf.SVMINLEN) {
976993
}
977994
}
978995

996+
String validateRefallele(String refallele) {
997+
if (IUPAC_AMBIGUITY_CODES.containsKey(refallele)){
998+
refallele = IUPAC_AMBIGUITY_CODES.get(refallele);
999+
}
1000+
return refallele;
1001+
}
1002+
9791003
/**
9801004
* Microsatellite instability
9811005
* Tandemly repeated short sequence motifs ranging from 1– 6(8 in our case) base pairs are called microsatellites.

src/main/java/com/astrazeneca/vardict/postprocessmodules/SimplePostProcessModule.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ public void accept(Scope<AlignedVarsData> mapScope) {
6565
if (vref.refallele.contains("N")) {
6666
continue;
6767
}
68+
if (vref.refallele.equals(vref.varallele)) {
69+
if (!conf.doPileup) {
70+
continue;
71+
}
72+
}
6873
vref.vartype = vref.varType();
6974
if (!vref.isGoodVar(variantsOnPosition.referenceVariant, vref.vartype, mapScope.splice)) {
7075
if (!conf.doPileup) {

src/test/java/com/astrazeneca/vardict/modules/ToVarsBuilderTest.java

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111
import org.testng.annotations.BeforeMethod;
1212
import org.testng.annotations.Test;
1313

14-
import java.util.ArrayList;
15-
import java.util.HashMap;
16-
import java.util.List;
17-
import java.util.Map;
14+
import java.util.*;
1815

1916
import static org.testng.Assert.assertEquals;
2017

@@ -132,4 +129,15 @@ public void createVariant() {
132129

133130
assertEquals(var.get(0).toString(), expectedVariant.toString());
134131
}
132+
133+
@Test
134+
public void testValidateRefAllele(){
135+
List<String> alleles = Arrays.asList("A", "C", "G", "T", "N", "M", "R", "W", "S", "Y", "K", "V", "H", "D", "B");
136+
List<String> expected = Arrays.asList("A", "C", "G", "T", "N", "A", "A", "A", "C", "C", "G", "A", "A", "A", "C");
137+
List<String> validatedAlleles = new ArrayList<>();
138+
for (String allele: alleles) {
139+
validatedAlleles.add(toVarsBuilder.validateRefallele(allele));
140+
}
141+
assertEquals(validatedAlleles, expected);
142+
}
135143
}

0 commit comments

Comments
 (0)