Skip to content
This repository was archived by the owner on May 22, 2026. It is now read-only.

Commit db17547

Browse files
authored
Merge pull request #271 from PolinaBevad/fix_issue_265_pileup_missed_position
Fixed issue 265: missed position in pileup when insertion/complex occurs
2 parents 6ed76a0 + 530382f commit db17547

5 files changed

Lines changed: 76 additions & 48 deletions

src/main/java/com/astrazeneca/vardict/modules/ToVarsBuilder.java

Lines changed: 61 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ && getNonInsertionVariants().get(position + 1).containsKey(ref.get(position + 1)
625625
referenceForwardCoverage = tpref.varsCountOnForward;
626626
referenceReverseCoverage = tpref.varsCountOnReverse;
627627
}
628-
628+
List<Integer> positionsForChangedRefVariant = new ArrayList<>();
629629
// only reference reads are observed.
630630
if (variationsAtPos.variants.size() > 0) { //Condition: non-reference variants are found
631631
//Loop over non-reference variants
@@ -936,61 +936,75 @@ else if (deletionLength < instance().conf.SVMINLEN) {
936936
}
937937

938938
adjustVariantCounts(position, vref);
939-
940-
//Construct debug lines
941-
if (instance().conf.debug) {
942-
StringBuilder sb = new StringBuilder();
943-
for (String str : debugLines) {
944-
if (sb.length() > 0) {
945-
sb.append(" & ");
946-
}
947-
sb.append(str);
948-
}
949-
vref.DEBUG = sb.toString();
939+
if (startPosition != position && instance().conf.doPileup) {
940+
positionsForChangedRefVariant.add(position);
950941
}
942+
constructDebugLines(debugLines, vref);
951943
}
952944
//TODO: It is a "lazy" solution because current logic in realignment methods can't be changed simply for --nosv option
953945
if (instance().conf.disableSV) {
954946
variationsAtPos.variants.removeIf(vref -> ANY_SV.matcher(vref.varallele).find());
955947
}
956-
} else if (variationsAtPos.referenceVariant != null) {
957-
Variant vref = variationsAtPos.referenceVariant; //no variant reads are detected.
958-
vref.totalPosCoverage = totalPosCoverage;
959-
vref.positionCoverage = 0;
960-
vref.frequency = 0;
961-
vref.refForwardCoverage = referenceForwardCoverage;
962-
vref.refReverseCoverage = referenceReverseCoverage;
963-
vref.varsCountOnForward = 0;
964-
vref.varsCountOnReverse = 0;
965-
vref.msi = 0;
966-
vref.msint = 0;
967-
vref.strandBiasFlag += ";0";
968-
vref.shift3 = 0;
969-
vref.startPosition = position;
970-
vref.endPosition = position;
971-
vref.highQualityReadsFrequency = roundHalfEven("0.0000", vref.highQualityReadsFrequency);
972-
String referenceBase = ref.containsKey(position) ? ref.get(position).toString() : ""; // $r
973-
//both refallele and varallele are 1 base from reference string
974-
vref.refallele = validateRefallele(referenceBase);
975-
vref.varallele = validateRefallele(referenceBase);
976-
vref.genotype = referenceBase + "/" + referenceBase;
977-
vref.leftseq = "";
978-
vref.rightseq = "";
979-
vref.duprate = duprate;
980-
//Construct debug lines
981-
if (instance().conf.debug) {
982-
StringBuilder sb = new StringBuilder();
983-
for (String str : debugLines) {
984-
if (sb.length() > 0) {
985-
sb.append(" & ");
986-
}
987-
sb.append(str);
988-
}
989-
vref.DEBUG = sb.toString();
990-
}
948+
} else if (variationsAtPos.referenceVariant != null ) { //no variant reads are detected
949+
Variant vref = variationsAtPos.referenceVariant;
950+
updateRefVariant(position, totalPosCoverage, vref, debugLines,
951+
referenceForwardCoverage, referenceReverseCoverage);
991952
} else {
992953
variationsAtPos.referenceVariant = new Variant();
993954
}
955+
956+
// Update reference variants if there were indels and start position were changed, so after update
957+
// ref variants can be output in pileup
958+
if (positionsForChangedRefVariant.contains(position) && variationsAtPos.referenceVariant != null) {
959+
Variant vref = variationsAtPos.referenceVariant;
960+
updateRefVariant(position, totalPosCoverage, vref, debugLines,
961+
referenceForwardCoverage, referenceReverseCoverage);
962+
}
963+
}
964+
965+
private void updateRefVariant(int position, int totalPosCoverage, Variant vref, List<String> debugLines,
966+
int referenceForwardCoverage, int referenceReverseCoverage) {
967+
vref.totalPosCoverage = totalPosCoverage;
968+
vref.positionCoverage = 0;
969+
vref.frequency = 0;
970+
vref.refForwardCoverage = referenceForwardCoverage;
971+
vref.refReverseCoverage = referenceReverseCoverage;
972+
vref.varsCountOnForward = 0;
973+
vref.varsCountOnReverse = 0;
974+
vref.msi = 0;
975+
vref.msint = 0;
976+
vref.strandBiasFlag += ";0";
977+
vref.shift3 = 0;
978+
vref.startPosition = position;
979+
vref.endPosition = position;
980+
vref.highQualityReadsFrequency = roundHalfEven("0.0000", vref.highQualityReadsFrequency);
981+
String referenceBase = ref.containsKey(position) ? ref.get(position).toString() : ""; // $r
982+
//both refallele and varallele are 1 base from reference string
983+
vref.refallele = validateRefallele(referenceBase);
984+
vref.varallele = validateRefallele(referenceBase);
985+
vref.genotype = referenceBase + "/" + referenceBase;
986+
vref.leftseq = "";
987+
vref.rightseq = "";
988+
vref.duprate = duprate;
989+
990+
constructDebugLines(debugLines, vref);
991+
}
992+
993+
/**
994+
* Construct DEBUG lines for the variant
995+
*/
996+
private void constructDebugLines(List<String> debugLines, Variant vref) {
997+
998+
if (instance().conf.debug) {
999+
StringBuilder sb = new StringBuilder();
1000+
for (String str : debugLines) {
1001+
if (sb.length() > 0) {
1002+
sb.append(" & ");
1003+
}
1004+
sb.append(str);
1005+
}
1006+
vref.DEBUG = sb.toString();
1007+
}
9941008
}
9951009

9961010
/**

src/main/java/com/astrazeneca/vardict/postprocessmodules/SimplePostProcessModule.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public void accept(Scope<AlignedVarsData> mapScope) {
4747
}
4848
}
4949

50-
if (variantsOnPosition != null && variantsOnPosition.variants.isEmpty()) {
50+
if (variantsOnPosition.variants.isEmpty()) {
5151
if (!conf.doPileup) {
5252
continue;
5353
}
@@ -70,6 +70,16 @@ public void accept(Scope<AlignedVarsData> mapScope) {
7070
continue;
7171
}
7272
}
73+
if (vref.startPosition != position && conf.doPileup && vvar.size() == 1) {
74+
Variant refVar = variantsOnPosition.referenceVariant;
75+
if (refVar == null) {
76+
SimpleOutputVariant outputVariant = new SimpleOutputVariant(refVar, mapScope.region, variantsOnPosition.sv, position);
77+
variantPrinter.print(outputVariant);
78+
refVar = new Variant();
79+
}
80+
refVar.vartype = "";
81+
vrefs.add(refVar);
82+
}
7383
vref.vartype = vref.varType();
7484
if (!vref.isGoodVar(variantsOnPosition.referenceVariant, vref.vartype, mapScope.splice)) {
7585
if (!conf.doPileup) {

testdata/integrationtestcases/Simple;hard_clip_case.fa;hard_clip_next_to_del_test1.bam;test;6674-6824;-f 0.0 -p -r 1.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ hard_clip_next_to_del_test1 testbed test 6768 6768 G G 1 0 1 0 0 0 G/G 0 0;0 20.
33
hard_clip_next_to_del_test1 testbed test 6769 6769 G G 1 0 1 0 0 0 G/G 0 0;0 19.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6675-6824 0 0
44
hard_clip_next_to_del_test1 testbed test 6770 6770 G G 1 0 1 0 0 0 G/G 0 0;0 18.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6675-6824 0 0
55
hard_clip_next_to_del_test1 testbed test 6771 6787 GCCTCTTGCCTGGCAGC CCTCTTGCCTGGCAGCA 1 1 0 0 1 0 CCTCTTGCCTGGCAGCACCTCTTGCCTGGCAGC/CCTCTTGCCTGGCAGCA 1.0000 0;0 1.0 0 90.0 0 60.0 2.000 1.0000 0 0 2.000 1 0 1 1 AGCTGGATATTGTGGCTGGG ACAAAGCGCAGAGGTCCCCT test:6675-6824 Complex 0 0
6+
hard_clip_next_to_del_test1 testbed test 6692 6692 C C 1 0 1 0 0 0 C/C 0 0;0 1.0 0 90.0 0 60.0 0 0 0 0 0 0 0 0 0 0 0 test:6675-6824 0 0
67
hard_clip_next_to_del_test1 testbed test 6693 6695 CTC TTCT 1 1 1 0 1 0 CCTC/+5 1.0000 0;0 1.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 TGAGCAGAGGCAAGGCACCC TTACGGGAAGCCCTTCTGGC test:6675-6824 Complex 0 0
78
hard_clip_next_to_del_test1 testbed test 6696 6767 TTACGGGAAGCCCTTCTGGCGCTCACTCAGGGCAGCAGCTTCCCAAGCCAAGCCAAGCTGGATATTGTGGCT GGCGCTTCTGGCGCTCACTCAGGGCAGCAGCTTCCCAAGCCAAGCCAAGCTGGATATTGTGGCTG 1 1 0 0 1 0 -7GGCGCTTCTGGCGCTCACTCAGGGCAGCAGCTTCCCAAGCCAAGCCAAGCTGGATATTGTGGCTGAAGCCCTTCTGGCGCTCACTCAGGGCAGCAGCTTCCCAAGCCAAGCCAAGCTGGATATTGTGGCT/-7GGCGCTTCTGGCGCTCACTCAGGGCAGCAGCTTCCCAAGCCAAGCCAAGCTGGATATTGTGGCTG 1.0000 0;0 5.0 0 90.0 0 60.0 2.000 1.0000 0 0 3.000 1 0 1 1 GCAGAGGCAAGGCACCCCTC GGGGCCTCTTGCCTGGCAGC test:6675-6824 Complex 0 0
89

testdata/integrationtestcases/Simple;hard_clip_case.fa;hard_clip_next_to_del_test2.bam;test;6434-6564;-f 0.0 -p -r 1.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ hard_clip_next_to_del_test2 testbed test 6497 6497 C C 1 0 1 0 0 0 C/C 0 0;0 49.
1818
hard_clip_next_to_del_test2 testbed test 6498 6498 C C 1 0 1 0 0 0 C/C 0 0;0 48.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
1919
hard_clip_next_to_del_test2 testbed test 6499 6499 C C 1 0 1 0 0 0 C/C 0 0;0 47.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
2020
hard_clip_next_to_del_test2 testbed test 6500 6501 CT T 1 1 0 0 1 0 -1TCCACACTCCACAC/-1TCCACAC 1.0000 0;0 40.0 0 90.0 0 60.0 2.000 1.0000 0 0 5.000 1 0 1 1 CAACGCGTCTCTTTTTCCCC CCACACAAAATAAATGAAAA test:6434-6564 Complex 0 0
21+
hard_clip_next_to_del_test2 testbed test 6442 6442 A A 1 0 1 0 0 0 A/A 0 0;0 1.0 0 90.0 0 60.0 0 0 0 0 0 0 0 0 0 0 0 test:6434-6564 0 0
2122
hard_clip_next_to_del_test2 testbed test 6443 6447 ATGTG TTGTGA 1 1 1 0 1 0 AATGTG/+7 1.0000 0;0 1.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 CTTTAGTACACAGTTAATAA AAGTTGTCCTGTGCCCTCCC test:6434-6564 Complex 0 0
2223
hard_clip_next_to_del_test2 testbed test 6508 6508 A A 1 0 1 0 0 0 A/A 0 0;0 39.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
2324
hard_clip_next_to_del_test2 testbed test 6509 6509 A A 1 0 1 0 0 0 A/A 0 0;0 38.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
@@ -29,5 +30,6 @@ hard_clip_next_to_del_test2 testbed test 6520 6520 A A 1 0 1 0 0 0 A/A 0 0;0 27.
2930
hard_clip_next_to_del_test2 testbed test 6521 6524 ATTT TTTA 1 1 0 0 1 0 TTTATTT/TTTA 1.0000 0;0 23.0 0 90.0 0 60.0 2.000 1.0000 0 2 3.000 1 0 1 1 TCCACACAAAATAAATGAAA AAAAGACAGATTTTTTTTTT test:6434-6564 Complex 0 0
3031
hard_clip_next_to_del_test2 testbed test 6525 6525 A A 1 0 1 0 0 0 A/A 0 0;0 22.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
3132
hard_clip_next_to_del_test2 testbed test 6526 6526 A A 1 0 1 0 0 0 A/A 0 0;0 21.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
33+
hard_clip_next_to_del_test2 testbed test 6527 6527 A A 1 0 1 0 0 0 A/A 0 0;0 20.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 0 0 test:6434-6564 0 0
3234
hard_clip_next_to_del_test2 testbed test 6528 6529 AG GAC 1 1 1 0 1 0 AAG/+4 1.0000 0;0 17.0 0 90.0 0 60.0 2.000 1.0000 0 0 0 0 0 1 1 AAAATAAATGAAAATTTAAA ACAGATTTTTTTTTTTAGGA test:6434-6564 Complex 0 0
3335

testdata/integrationtestcases/Simple;mm10.fa;T7-75_S10_chr10_DEL.bam;chr10;11291400-11295300;-f 0.001;.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3944,6 +3944,7 @@ T7-75_S10_chr10_DEL testbed 10 11295243 11295243 T T 1203 0 632 571 0 0 T/T 0 2;
39443944
T7-75_S10_chr10_DEL testbed 10 11295236 11295236 C T 1165 1 621 543 1 0 C/T 0.0009 2;0 60.0 0 12.0 0 255.0 0 0 0 4 1.000 1 7.0 0 1146 GGTATGGCGATTCAGTACTG ATTTTGTTCTGTTAATGACT 10:11291401-11295300 SNV 0 0
39453945
T7-75_S10_chr10_DEL testbed 10 11295237 11295237 A G 1164 3 620 540 2 1 A/G 0.0026 2;2 27.0 1 17.3 1 255.0 0.500 0.0009 0.0009 3 4.000 1 1.3 1 1133 GTATGGCGATTCAGTACTGC TTTTGTTCTGTTAATGACTG 10:11291401-11295300 SNV 0 0
39463946
T7-75_S10_chr10_DEL testbed 10 11295237 11295237 A T 1164 1 620 540 1 0 A/T 0.0009 2;0 64.0 0 12.0 0 255.0 0 0 0 3 4.000 1 3.0 0 1133 GTATGGCGATTCAGTACTGC TTTTGTTCTGTTAATGACTG 10:11291401-11295300 SNV 0 0
3947+
T7-75_S10_chr10_DEL testbed 10 11295238 11295238 T T 1164 0 623 540 0 0 T/T 0 2;0 39.5 1 39.8 1 254.8 57.150 0.9991 0 0 0 0 0.4 1143 1144 0 0 10:11291401-11295300 0 0
39473948
T7-75_S10_chr10_DEL testbed 10 11295237 11295238 AT A 1164 1 623 540 1 0 T/-1 0.0009 2;0 42.0 0 41.0 0 255.0 2.000 0.0009 0 3 4.000 1 0 1 1144 GTATGGCGATTCAGTACTGC TTTGTTCTGTTAATGACTGG 10:11291401-11295300 Deletion 0 0
39483949
T7-75_S10_chr10_DEL testbed 10 11295239 11295239 T T 1165 0 624 541 0 0 T/T 0 2;0 39.6 1 39.8 1 254.8 54.476 1.0000 0 0 0 0 0.4 1144 1144 0 0 10:11291401-11295300 0 0
39493950
T7-75_S10_chr10_DEL testbed 10 11295232 11295232 A C 1093 1 614 477 1 0 A/C 0.0009 2;0 21.0 0 12.0 0 255.0 0 0 0 0 1.000 1 2.0 0 1064 TCTAGGTATGGCGATTCAGT CTGCATTTTGTTCTGTTAAT 10:11291401-11295300 SNV 0 0

0 commit comments

Comments
 (0)