-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path3.1.3_Training_files_split_70_30.R
More file actions
47 lines (28 loc) · 1.6 KB
/
3.1.3_Training_files_split_70_30.R
File metadata and controls
47 lines (28 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#NORTH AMERICA SOIL MOISTURE DATASET DERIVED FROM TIME-SPECIFIC ADAPTABLE MODELS
#3. Generation of Biweekly Training and Prediction matrices
###3.1.3 North America biweekly training matrices split
#This code randomly splits North America training files in 70% and 30 %
#for training the models and for later test.
##########
setwd("E:/3_North_America_SM_predictions")
##########
years <- c("2002","2003","2004","2005","2006","2007","2008","2009","2010",
"2011","2012","2013","2014","2015","2016","2017","2018","2019","2020")
biweeks <- c("01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23")
training_files <- list.files(path = "./3_Training_and_Test_data_csv/1_northamerica",
pattern = ".csv", recursive = TRUE, full.names = TRUE)
for (p in 1:length(training_files)) {
x <- read.csv(training_files[p])
biweek_name <- substr(training_files[p], 82, 88)
## 70% of the sample size
smp_size <- floor(0.70 * nrow(x))
## set the seed to make your partition reproducible
train_ind <- sample(seq_len(nrow(x)), size = smp_size)
train <- x[train_ind, ]
test <- x[-train_ind, ]
write.csv(train, paste0("./3_Training_and_Test_data_csv/2_northamerica_train_70pct/northamerica_train_v71_250m_70pct_",
biweek_name, ".csv"), row.names = FALSE)
write.csv(test, paste0("./3_Training_and_Test_data_csv/3_northamerica_test_30pct/northamerica_test_v71_250m_30pct_",
biweek_name, ".csv"), row.names = FALSE)
print(paste0(p, " ", biweek_name))
}