Skip to content

Commit 823f23c

Browse files
author
Zeyi Wen
authored
Merge pull request #3 from shijiashuai/master
added multi-class training and prediction
2 parents ee9c285 + 6787a23 commit 823f23c

14 files changed

Lines changed: 997 additions & 575 deletions

Makefile

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ NVCC := /usr/local/cuda/bin/nvcc
1010
ODIR = bin
1111
dummy_build_folder := $(shell mkdir -p $(ODIR))
1212

13-
bin/mascot: classificationKernel_cu.o commandLineParser.o cvFunction.o fileOps.o gpu_global_utility.o initCuda_cu.o modelSelector_cu.o smoGPUHelper_cu.o smoSolver_cu.o svmMain.o svmPredictor_cu.o svmTrainer_cu.o trainingFunction_cu.o cacheGS.o cacheLRU.o cacheMLRU.o cacheMRU.o DataIO.o ReadHelper.o accessHessian.o baseHessian_cu.o parAccessor.o seqAccessor.o deviceHessian_cu.o LinearCalculater_cu.o LinearCalGPUHelper_cu.o PolynomialCalGPUHelper_cu.o PolynomialCalculater_cu.o RBFCalculater_cu.o RBFCalGPUHelper_cu.o SigmoidCalculater_cu.o SigmoidCalGPUHelper_cu.o storageManager_cu.o hostStorageManager.o smoSharedSolver_cu.o svmSharedTrainer_cu.o baseLibsvmReader.o devUtility_cu.o
14-
$(NVCC) $(LASTFLAG) $(LDFLAGS) -o bin/mascot cacheGS.o cacheLRU.o cacheMLRU.o cacheMRU.o DataIO.o baseLibsvmReader.o ReadHelper.o baseHessian_cu.o accessHessian.o parAccessor.o seqAccessor.o deviceHessian_cu.o LinearCalculater_cu.o LinearCalGPUHelper_cu.o PolynomialCalGPUHelper_cu.o PolynomialCalculater_cu.o RBFCalculater_cu.o RBFCalGPUHelper_cu.o SigmoidCalculater_cu.o SigmoidCalGPUHelper_cu.o devUtility_cu.o storageManager_cu.o hostStorageManager.o classificationKernel_cu.o commandLineParser.o cvFunction.o fileOps.o gpu_global_utility.o initCuda_cu.o smoGPUHelper_cu.o smoSharedSolver_cu.o smoSolver_cu.o svmMain.o svmPredictor_cu.o svmSharedTrainer_cu.o svmTrainer_cu.o modelSelector_cu.o trainingFunction_cu.o
13+
bin/mascot: classificationKernel_cu.o commandLineParser.o cvFunction.o fileOps.o gpu_global_utility.o svmProblem.o initCuda_cu.o modelSelector_cu.o smoGPUHelper_cu.o smoSolver_cu.o svmMain.o svmPredictor_cu.o svmTrainer_cu.o trainingFunction_cu.o cacheGS.o cacheLRU.o cacheMLRU.o cacheMRU.o DataIO.o ReadHelper.o accessHessian.o baseHessian_cu.o parAccessor.o seqAccessor.o deviceHessian_cu.o LinearCalculater_cu.o LinearCalGPUHelper_cu.o PolynomialCalGPUHelper_cu.o PolynomialCalculater_cu.o RBFCalculater_cu.o RBFCalGPUHelper_cu.o SigmoidCalculater_cu.o SigmoidCalGPUHelper_cu.o storageManager_cu.o hostStorageManager.o smoSharedSolver_cu.o svmSharedTrainer_cu.o baseLibsvmReader.o devUtility_cu.o svmModel_cu.o
14+
$(NVCC) $(LASTFLAG) $(LDFLAGS) -o bin/mascot cacheGS.o cacheLRU.o cacheMLRU.o cacheMRU.o DataIO.o baseLibsvmReader.o ReadHelper.o baseHessian_cu.o accessHessian.o parAccessor.o seqAccessor.o svmProblem.o deviceHessian_cu.o LinearCalculater_cu.o LinearCalGPUHelper_cu.o PolynomialCalGPUHelper_cu.o PolynomialCalculater_cu.o RBFCalculater_cu.o RBFCalGPUHelper_cu.o SigmoidCalculater_cu.o SigmoidCalGPUHelper_cu.o devUtility_cu.o storageManager_cu.o hostStorageManager.o classificationKernel_cu.o commandLineParser.o cvFunction.o fileOps.o gpu_global_utility.o initCuda_cu.o smoGPUHelper_cu.o smoSharedSolver_cu.o smoSolver_cu.o svmMain.o svmPredictor_cu.o svmSharedTrainer_cu.o svmTrainer_cu.o modelSelector_cu.o trainingFunction_cu.o svmModel_cu.o
1515
cvFunction.o: mascot/cvFunction.cpp
1616
g++ $(CCFLAGS) $(LDFLAGS) -o cvFunction.o -c mascot/cvFunction.cpp
1717

@@ -87,6 +87,12 @@ DataIO.o: mascot/DataIOOps/DataIO.h mascot/DataIOOps/DataIO.cpp
8787
ReadHelper.o: mascot/DataIOOps/ReadHelper.cpp
8888
g++ $(CCFLAGS) -o ReadHelper.o -c mascot/DataIOOps/ReadHelper.cpp
8989

90+
svmProblem.o: mascot/svmProblem.cpp
91+
g++ $(CCFLAGS) -o svmProblem.o -c mascot/svmProblem.cpp
92+
93+
svmModel_cu.o: mascot/svmModel.cu
94+
$(NVCC) $(NVCCFLAGS) $(LDFLAGS) -o svmModel_cu.o -c mascot/svmModel.cu
95+
9096
baseHessian_cu.o: svm-shared/HessianIO/baseHessian.h svm-shared/HessianIO/baseHessian.cu svm-shared/host_constant.h
9197
$(NVCC) $(NVCCFLAGS) $(LDFLAGS) -o baseHessian_cu.o -c svm-shared/HessianIO/baseHessian.cu
9298

mascot/DataIOOps/DataIO.cpp

Lines changed: 44 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -12,94 +12,59 @@
1212
using std::cout;
1313
using std::endl;
1414

15-
bool CDataIOOps::ReadFromFile(string strFileName, int nNumofFeature, vector<vector<float_point> > &v_vSampleData, vector<int> &v_nLabel)
16-
{
17-
bool nReturn = true;
18-
v_nLabel.clear();
19-
//these two containers is for storing positive and negative samples from file respectively
20-
vector<vector<float_point> > v_vPosSample;
21-
vector<vector<float_point> > v_vNegSample;
22-
23-
cout << "reading data..." << endl;
24-
//read data from file
25-
CReadHelper::ReadLibSVMDataFormat(v_vPosSample, v_vNegSample, strFileName, nNumofFeature);
26-
//organize the samples
27-
cout << "Dataset has " << v_vPosSample.size() + v_vNegSample.size() << " examples " << v_vPosSample.size() << " positive, and "
28-
<< v_vNegSample.size() << " negative with " << nNumofFeature << " dimensions." << endl;
29-
int nNumofExamples = v_vPosSample.size() + v_vNegSample.size();
30-
31-
v_vSampleData.reserve(nNumofExamples);
32-
v_nLabel.reserve(nNumofExamples);
33-
if(v_vPosSample.size() > 0 && v_vNegSample.size() > 0)
34-
OrganizeSamples(v_vPosSample, v_vNegSample, v_vSampleData, v_nLabel);
35-
else if(v_vPosSample.size() > 0)
36-
{
37-
for(int i = 0; i < int(v_vPosSample.size()); i++)
38-
v_nLabel.push_back(1);
39-
v_vSampleData = v_vPosSample;
40-
}
41-
else
42-
{
43-
for(int i = 0; i < int(v_vNegSample.size()); i++)
44-
v_nLabel.push_back(-1);
45-
v_vSampleData = v_vNegSample;
46-
}
47-
// cout << "sample organised" << endl;
48-
49-
assert(v_vSampleData.size() == nNumofExamples);
50-
51-
return nReturn;
15+
bool CDataIOOps::ReadFromFile(string strFileName, int nNumofFeature, vector<vector<float_point> > &v_vSampleData,
16+
vector<int> &v_nLabel) {
17+
bool nReturn = true;
18+
v_nLabel.clear();
19+
cout << "reading multi-class data..." << endl;
20+
//read data from file
21+
CReadHelper::ReadLibSVMMultiClassData(v_vSampleData, v_nLabel, strFileName, nNumofFeature);
22+
printf("dataset size:%d, #feature:%d\n", v_vSampleData.size(), nNumofFeature);
23+
return nReturn;
5224
}
5325

5426
/*
5527
* @brief: uniformly distribute positive and negative samples
5628
*/
5729
bool CDataIOOps::OrganizeSamples(vector<vector<float_point> > &v_vPosSample, vector<vector<float_point> > &v_vNegSample,
58-
vector<vector<float_point> > &v_vAllSample, vector<int> &v_nLabel)
59-
{
60-
//merge two sets of samples into one
61-
int nSizeofPSample = v_vPosSample.size();
62-
int nSizeofNSample = v_vNegSample.size();
63-
double dRatio = ((double)nSizeofPSample) / nSizeofNSample;
30+
vector<vector<float_point> > &v_vAllSample, vector<int> &v_nLabel) {
31+
//merge two sets of samples into one
32+
int nSizeofPSample = v_vPosSample.size();
33+
int nSizeofNSample = v_vNegSample.size();
34+
double dRatio = ((double) nSizeofPSample) / nSizeofNSample;
6435

65-
//put samples in a uniform way. This is to avoid the training set only having one class, during n-fold-cross-validation
66-
int nNumofPosInEachPart = 0;
67-
int nNumofNegInEachPart = 0;
68-
if(dRatio < 1)
69-
{
70-
nNumofPosInEachPart = 1;
71-
nNumofNegInEachPart = int(1.0 / dRatio);
72-
}
73-
else
74-
{
75-
nNumofPosInEachPart = (int)dRatio;
76-
nNumofNegInEachPart = 1;
77-
}
36+
//put samples in a uniform way. This is to avoid the training set only having one class, during n-fold-cross-validation
37+
int nNumofPosInEachPart = 0;
38+
int nNumofNegInEachPart = 0;
39+
if (dRatio < 1) {
40+
nNumofPosInEachPart = 1;
41+
nNumofNegInEachPart = int(1.0 / dRatio);
42+
} else {
43+
nNumofPosInEachPart = (int) dRatio;
44+
nNumofNegInEachPart = 1;
45+
}
7846

79-
vector<vector<float_point> >::iterator itPositive = v_vPosSample.begin();
80-
vector<vector<float_point> >::iterator itNegative = v_vNegSample.begin();
81-
int nCounter = 0;
82-
while(itPositive != v_vPosSample.end() || itNegative != v_vNegSample.end())
83-
{
84-
for(int i = 0; i < nNumofPosInEachPart && itPositive != v_vPosSample.end(); i++)
85-
{
86-
nCounter++;
87-
v_vAllSample.push_back(*itPositive);
88-
v_nLabel.push_back(1);
89-
itPositive++;
90-
}
47+
vector<vector<float_point> >::iterator itPositive = v_vPosSample.begin();
48+
vector<vector<float_point> >::iterator itNegative = v_vNegSample.begin();
49+
int nCounter = 0;
50+
while (itPositive != v_vPosSample.end() || itNegative != v_vNegSample.end()) {
51+
for (int i = 0; i < nNumofPosInEachPart && itPositive != v_vPosSample.end(); i++) {
52+
nCounter++;
53+
v_vAllSample.push_back(*itPositive);
54+
v_nLabel.push_back(1);
55+
itPositive++;
56+
}
9157

92-
for(int i = 0; i < nNumofNegInEachPart && itNegative != v_vNegSample.end(); i++)
93-
{
94-
nCounter++;
95-
v_vAllSample.push_back(*itNegative);
96-
v_nLabel.push_back(-1);
97-
itNegative++;
98-
}
99-
}
100-
v_vPosSample.clear();
101-
v_vNegSample.clear();
102-
return true;
58+
for (int i = 0; i < nNumofNegInEachPart && itNegative != v_vNegSample.end(); i++) {
59+
nCounter++;
60+
v_vAllSample.push_back(*itNegative);
61+
v_nLabel.push_back(-1);
62+
itNegative++;
63+
}
64+
}
65+
v_vPosSample.clear();
66+
v_vNegSample.clear();
67+
return true;
10368
}
10469

10570

mascot/DataIOOps/DataIO.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ class CReadHelper
5050
string strFileName, int nNumofFeatures, int nNumofSamples);
5151
static void ReadMultiClassData(vector<vector<float_point> > &v_vPosSample, vector<vector<float_point> > &v_vNegSample,
5252
string strFileName, int nNumofFeatures, int nNumofSamples);
53+
static void ReadLibSVMMultiClassData(vector<vector<float_point> > &v_vSamples, vector<int> &v_nLabels, const string strFileName,
54+
const long nNumofFeatures);
5355
};
5456

5557

0 commit comments

Comments
 (0)