Skip to content

Commit f7beb01

Browse files
authored
Add files via upload
1 parent 713333b commit f7beb01

14 files changed

Lines changed: 1005 additions & 0 deletions

A_Main.m

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
% Machine Learning Toolbox by Jingwei Too - 10/12/2020
2+
3+
% https://github.com/JingweiToo/Machine-Learning-Toolbox/tree/master
4+
5+
6+
%---Input-------------------------------------------------------------
7+
% feat : feature vector matrix (Instances x Features)
8+
% label : label matrix (Instances x 1)
9+
% opts : parameter settings
10+
% opts.tf : choose either hold-out / k-fold / leave-one-out
11+
% opts.ho : ratio of testing data in hold-out validation
12+
% opts.kfold : number of folds in k-fold cross-validation
13+
14+
15+
%---Output-------------------------------------------------------------
16+
% ML : machine learning model (It contains several results)
17+
% ML.acc : classification accuracy
18+
% ML.con : confusion matrix
19+
% ML.t : computational time (s)
20+
%----------------------------------------------------------------------
21+
22+
23+
%% Example 1: K-nearest neighbor (KNN) with k-fold cross-validation
24+
% Parameter settings
25+
opts.tf = 2;
26+
opts.kfold = 10;
27+
opts.k = 5; % k-value in KNN
28+
% Load data
29+
load iris.mat;
30+
% Classification
31+
ML = jml('knn',feat,label,opts);
32+
% Accuracy
33+
accuracy = ML.acc;
34+
% Confusion matrix
35+
confmat = ML.con;
36+
37+
38+
%% Example 2: Multi-class support vector machine (MSVM) with hold-out validation
39+
% Parameter settings
40+
opts.tf = 1;
41+
opts.ho = 0.3;
42+
opts.fun = 'r'; % radial basis kernel function in SVM
43+
% Load data
44+
load iris.mat;
45+
% Classification
46+
ML = jml('msvm',feat,label,opts);
47+
% Accuracy
48+
accuracy = ML.acc;
49+
% Confusion matrix
50+
confmat = ML.con;
51+
52+
53+
%% Example 3: Decision Tree (DT) with leave-one-out validation
54+
% Parameter settings
55+
opts.tf = 3;
56+
opts.nSplit = 50; % number of split in DT
57+
% Load data
58+
load iris.mat;
59+
% Classification
60+
ML = jml('dt',feat,label,opts);
61+
% Accuracy
62+
accuracy = ML.acc;
63+
% Confusion matrix
64+
confmat = ML.con;
65+
66+

jml.m

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
% Machine Learning toolbox by Jingwei Too - 10/12/2020
2+
3+
function ML = jml(type,feat,label,opts)
4+
switch type
5+
case 'gmm' ; fun = @mGaussianMixtureModel;
6+
case 'knn' ; fun = @mKNearestNeighbor;
7+
case 'da' ; fun = @mDiscriminateAnalysis;
8+
case 'nb' ; fun = @mNaiveBayesECOC;
9+
case 'msvm' ; fun = @mMultiClassSupportVectorMachineECOC;
10+
case 'svm' ; fun = @mSupportVectorMachine;
11+
case 'dt' ; fun = @mDecisionTree;
12+
case 'rf' ; fun = @mRandomForest;
13+
case 'et' ; fun = @mEnsembleTree;
14+
end
15+
tic; ML = fun(feat,label,opts);
16+
% Store
17+
time = toc;
18+
ML.t = time;
19+
20+
fprintf('\n Processing Time (s): %f % \n',time); fprintf('\n');
21+
end
22+
23+
24+

mDecisionTree.m

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
% Decision Tree (10/12/2020)
2+
3+
function DT = mDecisionTree(feat,label,opts)
4+
% Default
5+
num_split = 50;
6+
kfold = 10;
7+
tf = 2;
8+
9+
if isfield(opts,'kfold'), kfold = opts.kfold; end
10+
if isfield(opts,'ho'), ho = opts.ho; end
11+
if isfield(opts,'tf'), tf = opts.tf; end
12+
if isfield(opts,'nSplit'), num_split = opts.nSplit; end
13+
14+
% [Hold-out]
15+
if tf == 1
16+
fold = cvpartition(label,'HoldOut',ho);
17+
% Call train & test data
18+
xtrain = feat(fold.training,:); ytrain = label(fold.training);
19+
xtest = feat(fold.test,:); ytest2 = label(fold.test);
20+
% Perform decision tree
21+
Model = fitctree(xtrain,ytrain,'MaxNumSplits',num_split);
22+
% Test
23+
pred2 = predict(Model,xtest);
24+
% Accuracy
25+
Afold = sum(pred2 == ytest2) / length(ytest2);
26+
27+
% [Cross-validation]
28+
elseif tf == 2
29+
% [Cross-validation]
30+
fold = cvpartition(label,'KFold',kfold);
31+
Afold = zeros(kfold,1);
32+
pred2 = [];
33+
ytest2 = [];
34+
for i = 1:kfold
35+
% Call train & test data
36+
trainIdx = fold.training(i); testIdx = fold.test(i);
37+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
38+
xtest = feat(testIdx,:); ytest = label(testIdx);
39+
% Perform decision tree
40+
Model = fitctree(xtrain,ytrain,'MaxNumSplits',num_split);
41+
% Test
42+
pred = predict(Model,xtest); clear Model
43+
% Accuracy
44+
Afold(i) = sum(pred == ytest) / length(ytest);
45+
% Store temporary
46+
pred2 = [pred2(1:end); pred];
47+
ytest2 = [ytest2(1:end); ytest];
48+
end
49+
50+
% [Leave one out]
51+
elseif tf == 3
52+
fold = cvpartition(label,'LeaveOut');
53+
% Size of data
54+
num_data = length(label);
55+
Afold = zeros(num_data,1);
56+
pred2 = [];
57+
ytest2 = [];
58+
for i = 1:num_data
59+
% Call train & test data
60+
trainIdx = fold.training(i); testIdx = fold.test(i);
61+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
62+
xtest = feat(testIdx,:); ytest = label(testIdx);
63+
% Perform decision tree
64+
Model = fitctree(xtrain,ytrain,'MaxNumSplits',num_split);
65+
% Test
66+
pred = predict(Model,xtest); clear Model
67+
% Accuracy
68+
Afold(i) = sum(pred == ytest) / length(ytest);
69+
% Store temporary
70+
pred2 = [pred2(1:end); pred];
71+
ytest2 = [ytest2(1:end); ytest];
72+
end
73+
end
74+
% Confusion matrix
75+
confmat = confusionmat(ytest2,pred2);
76+
% Overall accuracy
77+
acc = mean(Afold);
78+
% Store results
79+
DT.acc = acc;
80+
DT.con = confmat;
81+
82+
if tf == 1
83+
fprintf('\n Accuracy (DT-HO): %g %%',100 * acc);
84+
elseif tf == 2
85+
fprintf('\n Accuracy (DT-CV): %g %%',100 * acc);
86+
elseif tf == 3
87+
fprintf('\n ccuracy (DT-LOO): %g %%',100 * acc);
88+
end
89+
end
90+

mDiscriminateAnalysis.m

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
% Discriminate analysis (10/12/2020)
2+
3+
function DA = mDiscriminateAnalysis(feat,label,opts)
4+
% Default
5+
fun = 'l';
6+
kfold = 10;
7+
tf = 2;
8+
9+
if isfield(opts,'kfold'), kfold = opts.kfold; end
10+
if isfield(opts,'ho'), ho = opts.ho; end
11+
if isfield(opts,'tf'), tf = opts.tf; end
12+
if isfield(opts,'fun'), fun = opts.fun; end
13+
14+
switch fun
15+
case 'l' ; fun = 'linear';
16+
case 'pq'; fun = 'pseudoquadratic';
17+
case 'q' ; fun = 'quadratic';
18+
case 'dl'; fun = 'diaglinear';
19+
case 'pl'; fun = 'pseudolinear';
20+
case 'dq'; fun = 'diagquadratic';
21+
end
22+
23+
% [Hold-out]
24+
if tf == 1
25+
fold = cvpartition(label,'HoldOut',ho);
26+
% Call train & test data
27+
xtrain = feat(fold.training,:); ytrain = label(fold.training);
28+
xtest = feat(fold.test,:); ytest2 = label(fold.test);
29+
% Train model
30+
My_Model = fitcdiscr(xtrain,ytrain,'DiscrimType',fun);
31+
% Test
32+
pred2 = predict(My_Model,xtest);
33+
% Accuracy
34+
Afold = sum(pred2 == ytest2) / length(ytest2);
35+
36+
% [Cross-validation]
37+
elseif tf == 2
38+
% [Cross-validation]
39+
fold = cvpartition(label,'KFold',kfold);
40+
Afold = zeros(kfold,1);
41+
pred2 = [];
42+
ytest2 = [];
43+
for i = 1:kfold
44+
% Call train & test data
45+
trainIdx = fold.training(i); testIdx = fold.test(i);
46+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
47+
xtest = feat(testIdx,:); ytest = label(testIdx);
48+
% Train model
49+
My_Model = fitcdiscr(xtrain,ytrain,'DiscrimType',fun);
50+
% Test
51+
pred = predict(My_Model,xtest); clear My_Model
52+
% Accuracy
53+
Afold(i) = sum(pred == ytest) / length(ytest);
54+
% Store temporary
55+
pred2 = [pred2(1:end); pred];
56+
ytest2 = [ytest2(1:end); ytest];
57+
end
58+
59+
% [Leave one out]
60+
elseif tf == 3
61+
fold = cvpartition(label,'LeaveOut');
62+
% Size of data
63+
num_data = length(label);
64+
Afold = zeros(num_data,1);
65+
pred2 = [];
66+
ytest2 = [];
67+
for i = 1:num_data
68+
% Call train & test data
69+
trainIdx = fold.training(i); testIdx = fold.test(i);
70+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
71+
xtest = feat(testIdx,:); ytest = label(testIdx);
72+
% Train model
73+
My_Model = fitcdiscr(xtrain,ytrain,'DiscrimType',fun);
74+
% Test
75+
pred = predict(My_Model,xtest); clear My_Model
76+
% Accuracy
77+
Afold(i) = sum(pred == ytest) / length(ytest);
78+
% Store temporary
79+
pred2 = [pred2(1:end); pred];
80+
ytest2 = [ytest2(1:end); ytest];
81+
end
82+
end
83+
% Confusion matrix
84+
confmat = confusionmat(ytest2,pred2);
85+
% Overall accuracy
86+
acc = mean(Afold);
87+
% Store
88+
DA.acc = acc;
89+
DA.con = confmat;
90+
91+
if tf == 1
92+
fprintf('\n Accuracy (DA-HO): %g %%',100 * acc);
93+
elseif tf == 2
94+
fprintf('\n Accuracy (DA-CV): %g %%',100 * acc);
95+
elseif tf == 3
96+
fprintf('\n Accuracy (DA-LO): %g %%',100 * acc);
97+
end
98+
end
99+

mEnsembleTree.m

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
% Ensemble Tree (10/12/2020)
2+
3+
function ET = mEnsembleTree(feat,label,opts)
4+
% Default
5+
num_split = 50;
6+
kfold = 10;
7+
tf = 2;
8+
9+
if isfield(opts,'kfold'), kfold = opts.kfold; end
10+
if isfield(opts,'ho'), ho = opts.ho; end
11+
if isfield(opts,'tf'), tf = opts.tf; end
12+
if isfield(opts,'nSplit'), num_split = opts.nSplit; end
13+
14+
% [Hold-out]
15+
if tf == 1
16+
fold = cvpartition(label,'HoldOut',ho);
17+
% Call train & test data
18+
xtrain = feat(fold.training,:); ytrain = label(fold.training);
19+
xtest = feat(fold.test,:); ytest2 = label(fold.test);
20+
% Train model
21+
Temp = templateTree('MaxNumSplits',num_split);
22+
Model = fitcensemble(xtrain,ytrain,'Learners',Temp,...
23+
'Method','AdaBoostM1');
24+
% Test
25+
pred2 = predict(Model,xtest);
26+
% Accuracy
27+
Afold = sum(pred2 == ytest2) / length(ytest2);
28+
29+
% [Cross-validation]
30+
elseif tf == 2
31+
% [Cross-validation]
32+
fold = cvpartition(label,'KFold',kfold);
33+
Afold = zeros(kfold,1);
34+
pred2 = [];
35+
ytest2 = [];
36+
for i = 1:kfold
37+
% Call train & test data
38+
trainIdx = fold.training(i); testIdx = fold.test(i);
39+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
40+
xtest = feat(testIdx,:); ytest = label(testIdx);
41+
% Train model
42+
Temp = templateTree('MaxNumSplits',num_split);
43+
Model = fitcensemble(xtrain,ytrain,'Learners',Temp,...
44+
'Method','AdaBoostM1');
45+
% Test
46+
pred = predict(Model,xtest); clear Model
47+
% Accuracy
48+
Afold(i) = sum(pred == ytest) / length(ytest);
49+
% Store temporary
50+
pred2 = [pred2(1:end); pred];
51+
ytest2 = [ytest2(1:end); ytest];
52+
end
53+
54+
% [Leave one out]
55+
elseif tf == 3
56+
fold = cvpartition(label,'LeaveOut');
57+
% Size of data
58+
num_data = length(label);
59+
Afold = zeros(num_data,1);
60+
pred2 = [];
61+
ytest2 = [];
62+
for i = 1:num_data
63+
% Call train & test data
64+
trainIdx = fold.training(i); testIdx = fold.test(i);
65+
xtrain = feat(trainIdx,:); ytrain = label(trainIdx);
66+
xtest = feat(testIdx,:); ytest = label(testIdx);
67+
% Train model
68+
Temp = templateTree('MaxNumSplits',num_split);
69+
Model = fitcensemble(xtrain,ytrain,'Learners',Temp,...
70+
'Method','AdaBoostM1');
71+
% Test
72+
pred = predict(Model,xtest); clear Model
73+
% Accuracy
74+
Afold(i) = sum(pred == ytest) / length(ytest);
75+
% Store temporary
76+
pred2 = [pred2(1:end); pred];
77+
ytest2 = [ytest2(1:end); ytest];
78+
end
79+
end
80+
% Confusion matrix
81+
confmat = confusionmat(ytest2,pred2);
82+
% Overall accuracy
83+
acc = mean(Afold);
84+
% Store
85+
ET.acc = acc;
86+
ET.con = confmat;
87+
88+
if tf==1
89+
fprintf('\n Accuracy (ET-HO): %g %%',100 * acc);
90+
elseif tf == 2
91+
fprintf('\n Accuracy (ET-CV): %g %%',100 * acc);
92+
elseif tf == 3
93+
fprintf('\n Accuracy (ET-LOO): %g %%',100 * acc);
94+
end
95+
end
96+

0 commit comments

Comments
 (0)