Skip to content
This repository was archived by the owner on May 22, 2026. It is now read-only.

Commit 03eb543

Browse files
authored
Merge pull request #291 from PolinaBevad/issue_285_and_288
Issues 285 and 288 fixes
2 parents b772179 + 73781ad commit 03eb543

18 files changed

Lines changed: 202 additions & 69 deletions

VarDict

src/main/java/com/astrazeneca/vardict/modules/ToVarsBuilder.java

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@
3030
* and variants in region.
3131
*/
3232
public class ToVarsBuilder implements Module<RealignedVariationData, AlignedVarsData> {
33+
// Left 20 and right 20 bases in reference to output in VCF file
34+
public static final int REF_20_BASES = 20;
35+
// Number of bases where to check for MSIs
36+
public static final int REF_30_BASES = 30;
37+
public static final int REF_50_BASES = 50;
38+
public static final int REF_70_BASES = 70;
39+
3340
private Region region;
3441
private Map<Integer, Integer> refCoverage;
3542
private Map<Integer, VariationMap<String, Variation>> insertionVariants;
@@ -196,10 +203,10 @@ private boolean isTheSameVariationOnRef(int position, VariationMap<String, Varia
196203
*/
197204
private MSI proceedVrefIsDeletion(int position, int dellen) {
198205
//left 70 bases in reference sequence
199-
String leftseq = joinRef(ref, (position - 70 > 1 ? position - 70 : 1), position - 1); // left 10 nt
206+
String leftseq = joinRef(ref, (Math.max(position - REF_70_BASES, 1)), position - 1); // left 10 nt
200207
int chr0 = getOrElse(instance().chrLengths, region.chr, 0);
201208
//right 70 + dellen bases in reference sequence
202-
String tseq = joinRef(ref, position, position + dellen + 70 > chr0 ? chr0 : position + dellen + 70);
209+
String tseq = joinRef(ref, position, Math.min(position + dellen + REF_70_BASES, chr0));
203210

204211
//Try to adjust for microsatellite instability
205212
MSI msiData = findMSI(substr(tseq, 0, dellen), substr(tseq, dellen), leftseq);
@@ -223,7 +230,7 @@ private MSI proceedVrefIsDeletion(int position, int dellen) {
223230

224231
/**
225232
* For insertion variant calculates shift3 (number of bases to be shifted to 3' for deletions due to alternative alignment),
226-
* msi (which indicates Microsatellite instability) and msint (MicroSattelite unit length in base pairs) fields.
233+
* msi (which indicates Microsatellite instability) and msint (MicroSatellite unit length in base pairs) fields.
227234
* @param position position to seek in reference to get left sequence of variant
228235
* @param vn variant description string
229236
* @return tuple of msi, shift3 and msint.
@@ -232,10 +239,10 @@ private MSI proceedVrefIsInsertion(int position, String vn) {
232239
//variant description string without first symbol '+'
233240
String tseq1 = vn.substring(1);
234241
//left 50 bases in reference sequence
235-
String leftseq = joinRef(ref, position - 50 > 1 ? position - 50 : 1, position); // left 10 nt
242+
String leftseq = joinRef(ref, Math.max(position - REF_50_BASES, 1), position); // left 10 nt
236243
int x = getOrElse(instance().chrLengths, region.chr, 0);
237244
//right 70 bases in reference sequence
238-
String tseq2 = joinRef(ref, position + 1, (position + 70 > x ? x : position + 70));
245+
String tseq2 = joinRef(ref, position + 1, (Math.min(position + REF_70_BASES, x)));
239246

240247
MSI msiData = findMSI(tseq1, tseq2, leftseq);
241248
double msi = msiData.msi;
@@ -741,9 +748,9 @@ else if (deletionLength < instance().conf.SVMINLEN) {
741748
}
742749
} else { //Not insertion/deletion variant. SNP or MNP
743750
//Find MSI adjustment
744-
String tseq1 = joinRef(ref, position - 30 > 1 ? position - 30 : 1, position + 1);
751+
String tseq1 = joinRef(ref, Math.max(position - REF_30_BASES, 1), position + 1);
745752
int chr0 = getOrElse(instance().chrLengths, region.chr, 0);
746-
String tseq2 = joinRef(ref, position + 2, position + 70 > chr0 ? chr0 : position + 70);
753+
String tseq2 = joinRef(ref, position + 2, Math.min(position + REF_70_BASES, chr0));
747754

748755
MSI msiData = findMSI(tseq1, tseq2, null);
749756
msi = msiData.msi;
@@ -856,9 +863,7 @@ else if (deletionLength < instance().conf.SVMINLEN) {
856863
if (cutSite != 0 && (refallele.length() != varallele.length() && substr(refallele, 0, 1).equals(substr(varallele, 0, 1)))) {
857864
if (!(startPosition == cutSite || endPosition == cutSite)) {
858865
int n = 0;
859-
int dis = abs(cutSite - startPosition) < abs(cutSite - endPosition)
860-
? abs(cutSite - startPosition)
861-
: abs(cutSite - endPosition);
866+
int dis = Math.min(abs(cutSite - startPosition), abs(cutSite - endPosition));
862867
if (startPosition < cutSite) {
863868
while(startPosition + n < cutSite && n < shift3 && endPosition + n != cutSite) {
864869
n++;
@@ -899,10 +904,10 @@ else if (deletionLength < instance().conf.SVMINLEN) {
899904
}
900905

901906
//preceding reference sequence
902-
vref.leftseq = joinRef(ref, startPosition - 20 < 1 ? 1 : startPosition - 20, startPosition - 1); // left 20 nt
907+
vref.leftseq = joinRef(ref, Math.max(startPosition - REF_20_BASES, 1), startPosition - 1); // left 20 nt
903908
int chr0 = getOrElse(instance().chrLengths, region.chr, 0);
904909
//following reference sequence
905-
vref.rightseq = joinRef(ref, endPosition + 1, endPosition + 20 > chr0 ? chr0 : endPosition + 20); // right 20 nt
910+
vref.rightseq = joinRef(ref, endPosition + 1, Math.min(endPosition + REF_20_BASES, chr0)); // right 20 nt
906911
//genotype description string
907912
String genotype = genotype1current + "/" + genotype2;
908913
//remove '&' and '#' symbols from genotype string
@@ -960,6 +965,29 @@ else if (deletionLength < instance().conf.SVMINLEN) {
960965
updateRefVariant(position, totalPosCoverage, vref, debugLines,
961966
referenceForwardCoverage, referenceReverseCoverage);
962967
}
968+
969+
if (instance().conf.doPileup && variationsAtPos.referenceVariant != null) {
970+
Variant refVar = variationsAtPos.referenceVariant;
971+
fillReferenceVarInPileup(position, refVar);
972+
}
973+
}
974+
975+
/**
976+
* Fill reference variant fields for pileup: positions, varallele and refallele
977+
* @param position current position in reference
978+
* @param refVar reference Variant
979+
*/
980+
private void fillReferenceVarInPileup(int position, Variant refVar) {
981+
refVar.startPosition = position;
982+
refVar.endPosition = position;
983+
984+
char emptyChar = 0;
985+
if (refVar.refallele.equals("")) {
986+
refVar.refallele = ref.getOrDefault(position, emptyChar).toString();
987+
}
988+
if (refVar.varallele.equals("")) {
989+
refVar.varallele = ref.getOrDefault(position, emptyChar).toString();
990+
}
963991
}
964992

965993
private void updateRefVariant(int position, int totalPosCoverage, Variant vref, List<String> debugLines,

src/main/java/com/astrazeneca/vardict/modules/VariationRealigner.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,10 +2452,17 @@ void adjRefFactor(Variation ref, double factor_f) {
24522452
ref.varsCount -= (int) (factor_f * ref.varsCount);
24532453
ref.highQualityReadsCount -= (int) (factor_f * ref.highQualityReadsCount);
24542454
ref.lowQualityReadsCount -= (int) (factor_f * ref.lowQualityReadsCount);
2455+
2456+
// Adjust mean mapping quality, mean quality and mean position only on number of changed counts
24552457
double factorCnt = oldVarsCount != 0 ? Math.abs((ref.varsCount - oldVarsCount)) / (double) oldVarsCount : 1;
2456-
ref.meanPosition -= ref.meanPosition * factor_f * factorCnt;
2457-
ref.meanQuality -= ref.meanQuality * factor_f * factorCnt;
2458-
ref.meanMappingQuality -= ref.meanMappingQuality * factor_f * factorCnt;
2458+
// Factors must be the same sign
2459+
if (factor_f < 0 && factorCnt > 0 || factor_f > 0 && factorCnt < 0){
2460+
factorCnt = -factorCnt;
2461+
}
2462+
ref.meanPosition -= ref.meanPosition * factorCnt;
2463+
ref.meanQuality -= ref.meanQuality * factorCnt;
2464+
ref.meanMappingQuality -= ref.meanMappingQuality * factorCnt;
2465+
24592466
ref.numberOfMismatches -= factor_f * ref.numberOfMismatches;
24602467
ref.varsCountOnForward -= (int) (factor_f * ref.varsCountOnForward);
24612468
ref.varsCountOnReverse -= (int) (factor_f * ref.varsCountOnReverse);
Binary file not shown.
Binary file not shown.

testdata/fastas/GRCh37.fa.csv

Lines changed: 2 additions & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)