Skip to content

Commit 4799fcd

Browse files
committed
undetermined nucleotid encoded N or - in sequence data (fix github issue #110 )
1 parent b1f1772 commit 4799fcd

1 file changed

Lines changed: 3 additions & 2 deletions

File tree

R-pkg/R/43_data_read.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,7 +1089,7 @@ read_mss_data <- function(data_file, data_dir) {
10891089
microsat_hap_encoding <- "^[0-9]{3}$"
10901090
microsat_dip_encoding <- "^[0-9]{6}$"
10911091
microsat_x_encoding <- "^([0-9]{3}|[0-9]{6})$"
1092-
nucleotid_encoding <- "[ATCG]*"
1092+
nucleotid_encoding <- "[ATCGN\\-]*" # N and - for undetermined nucleotid
10931093
seq_hap_encoding <- str_c("^<\\[", nucleotid_encoding, "\\]>")
10941094
seq_dip_encoding <- str_c(
10951095
"^<\\[", nucleotid_encoding, "\\]\\[", nucleotid_encoding, "\\]>"
@@ -1229,7 +1229,8 @@ read_mss_data <- function(data_file, data_dir) {
12291229
return(
12301230
str_length(str_extract_all(
12311231
locus_data[row_ind,col_ind],
1232-
"\\[[ATCG]*\\]",
1232+
"\\[[ATCGN\\-]*\\]",
1233+
# N or - for undetermined nucleotid
12331234
simplify = TRUE
12341235
)) - 2
12351236
)

0 commit comments

Comments
 (0)