Skip to content

Commit be3d9a1

Browse files
committed
Javadoc
1 parent ac35503 commit be3d9a1

34 files changed

Lines changed: 702 additions & 160 deletions

src/main/java/org/encog/ml/data/versatile/MatrixMLDataSet.java

Lines changed: 121 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
import org.encog.util.EngineArray;
1212

1313
/**
14+
* The MatrixMLDataSet can use a large 2D matrix of doubles to internally hold
15+
* data. It supports several advanced features such as the ability to mask and
16+
* time-box. Masking allows several datasets to use the same backing array,
17+
* however use different parts.
18+
*
19+
* Time boxing allows time-series data to be represented for prediction. The
20+
* following shows how data is laid out for different lag and lead settings.
1421
*
1522
* Lag 0; Lead 0 [10 rows] 1->1 2->2 3->3 4->4 5->5 6->6 7->7 8->8 9->9 10->10
1623
*
@@ -72,24 +79,63 @@ public final void remove() {
7279
}
7380
}
7481

82+
/**
83+
* The number of inputs.
84+
*/
7585
private int calculatedInputSize = -1;
86+
87+
/**
88+
* The number of ideal values.
89+
*/
7690
private int calculatedIdealSize = -1;
91+
92+
/**
93+
* The backing data.
94+
*/
7795
private double[][] data;
96+
97+
/**
98+
* The mask to the data.
99+
*/
78100
private int[] mask;
101+
102+
/**
103+
* The lag window size.
104+
*/
79105
private int lagWindowSize = 0;
106+
107+
/**
108+
* The lead window size.
109+
*/
80110
private int leadWindowSize = 0;
81111

112+
/**
113+
* The default constructor.
114+
*/
82115
public MatrixMLDataSet() {
83116

84117
}
85118

119+
/**
120+
* Construct the dataset with no mask.
121+
* @param theData The backing array.
122+
* @param theCalculatedInputSize The input size.
123+
* @param theCalculatedIdealSize The ideal size.
124+
*/
86125
public MatrixMLDataSet(double[][] theData, int theCalculatedInputSize,
87126
int theCalculatedIdealSize) {
88127
this.data = theData;
89128
this.calculatedInputSize = theCalculatedInputSize;
90129
this.calculatedIdealSize = theCalculatedIdealSize;
91130
}
92131

132+
/**
133+
* Construct the dataset from a 2D double array..
134+
* @param theData The data.
135+
* @param inputCount The input count.
136+
* @param idealCount The ideal count.
137+
* @param theMask The mask.
138+
*/
93139
public MatrixMLDataSet(double[][] theData, int inputCount, int idealCount,
94140
int[] theMask) {
95141
this.data = theData;
@@ -98,65 +144,95 @@ public MatrixMLDataSet(double[][] theData, int inputCount, int idealCount,
98144
this.mask = theMask;
99145
}
100146

147+
/**
148+
* Construct the dataset from another matrix dataset.
149+
* @param theData The data.
150+
* @param inputCount The input count.
151+
* @param idealCount The ideal count.
152+
* @param theMask The mask.
153+
*/
101154
public MatrixMLDataSet(MatrixMLDataSet data, int[] mask) {
102155
this.data = data.getData();
103156
this.calculatedInputSize = data.getCalculatedInputSize();
104157
this.calculatedIdealSize = data.getCalculatedIdealSize();
105158
this.mask = mask;
106159
}
107160

161+
/**
162+
* @return The mask.
163+
*/
108164
public int[] getMask() {
109165
return this.mask;
110166
}
111167

168+
/**
169+
* {@inheritDoc}
170+
*/
112171
@Override
113172
public Iterator<MLDataPair> iterator() {
114173
return new MatrixMLDataSetIterator();
115174
}
116175

176+
/**
177+
* {@inheritDoc}
178+
*/
117179
@Override
118180
public int getIdealSize() {
119181
return this.calculatedIdealSize * Math.min(this.leadWindowSize, 1);
120182
}
121183

184+
/**
185+
* {@inheritDoc}
186+
*/
122187
@Override
123188
public int getInputSize() {
124189
return this.calculatedInputSize * this.lagWindowSize;
125190
}
126191

192+
/**
193+
* {@inheritDoc}
194+
*/
127195
@Override
128196
public boolean isSupervised() {
129197
return getIdealSize() == 0;
130198
}
131199

200+
/**
201+
* {@inheritDoc}
202+
*/
132203
@Override
133204
public long getRecordCount() {
134-
if( this.data==null ) {
135-
throw new EncogError("You must normalize the dataset before using it.");
205+
if (this.data == null) {
206+
throw new EncogError(
207+
"You must normalize the dataset before using it.");
136208
}
137-
209+
138210
if (this.mask == null) {
139211
return this.data.length
140212
- (this.lagWindowSize + this.leadWindowSize);
141213
}
142214
return this.mask.length - (this.lagWindowSize + this.leadWindowSize);
143215
}
144-
216+
145217
private int calculateLagCount() {
146-
return (MatrixMLDataSet.this.lagWindowSize <= 0) ? 1: (this.lagWindowSize+1);
218+
return (MatrixMLDataSet.this.lagWindowSize <= 0) ? 1
219+
: (this.lagWindowSize + 1);
147220
}
148-
221+
149222
private int calculateLeadCount() {
150-
return (this.leadWindowSize <= 1) ? 1
151-
: this.leadWindowSize;
223+
return (this.leadWindowSize <= 1) ? 1 : this.leadWindowSize;
152224
}
153225

226+
/**
227+
* {@inheritDoc}
228+
*/
154229
@Override
155230
public void getRecord(long index, MLDataPair pair) {
156-
if( this.data==null ) {
157-
throw new EncogError("You must normalize the dataset before using it.");
231+
if (this.data == null) {
232+
throw new EncogError(
233+
"You must normalize the dataset before using it.");
158234
}
159-
235+
160236
// Copy the input, account for time windows.
161237
int inputSize = calculateLagCount();
162238
for (int i = 0; i < inputSize; i++) {
@@ -171,13 +247,19 @@ public void getRecord(long index, MLDataPair pair) {
171247
int outputStart = (this.leadWindowSize > 0) ? 1 : 0;
172248
int outputSize = calculateLeadCount();
173249
for (int i = 0; i < outputSize; i++) {
174-
double[] dataRow = lookupDataRow((int) (index + i+outputStart));
175-
EngineArray.arrayCopy(dataRow, this.calculatedInputSize, pair.getIdealArray(), i
176-
* MatrixMLDataSet.this.calculatedIdealSize,
250+
double[] dataRow = lookupDataRow((int) (index + i + outputStart));
251+
EngineArray.arrayCopy(dataRow, this.calculatedInputSize,
252+
pair.getIdealArray(), i
253+
* MatrixMLDataSet.this.calculatedIdealSize,
177254
MatrixMLDataSet.this.calculatedIdealSize);
178255
}
179256
}
180257

258+
/**
259+
* Find a row, using the mask.
260+
* @param index The index we seek.
261+
* @return The row.
262+
*/
181263
private double[] lookupDataRow(int index) {
182264
if (this.mask != null) {
183265
return this.data[this.mask[index]];
@@ -186,6 +268,9 @@ private double[] lookupDataRow(int index) {
186268
}
187269
}
188270

271+
/**
272+
* {@inheritDoc}
273+
*/
189274
@Override
190275
public MLDataSet openAdditional() {
191276
MatrixMLDataSet result = new MatrixMLDataSet(this.data,
@@ -195,47 +280,65 @@ public MLDataSet openAdditional() {
195280
return result;
196281
}
197282

283+
/**
284+
* {@inheritDoc}
285+
*/
198286
@Override
199287
public void add(MLData data1) {
200288
// TODO Auto-generated method stub
201289

202290
}
203291

292+
/**
293+
* {@inheritDoc}
294+
*/
204295
@Override
205296
public void add(MLData inputData, MLData idealData) {
206297
// TODO Auto-generated method stub
207298

208299
}
209300

301+
/**
302+
* {@inheritDoc}
303+
*/
210304
@Override
211305
public void add(MLDataPair inputData) {
212306
// TODO Auto-generated method stub
213307

214308
}
215309

310+
/**
311+
* {@inheritDoc}
312+
*/
216313
@Override
217314
public void close() {
218315
// TODO Auto-generated method stub
219316

220317
}
221318

319+
/**
320+
* {@inheritDoc}
321+
*/
222322
@Override
223323
public int size() {
224324
return (int) getRecordCount();
225325
}
226326

327+
/**
328+
* {@inheritDoc}
329+
*/
227330
@Override
228331
public MLDataPair get(int index) {
229-
if (index>size()) {
332+
if (index > size()) {
230333
return null;
231334
}
232335

233336
BasicMLData input = new BasicMLData(
234-
MatrixMLDataSet.this.calculatedInputSize*calculateLagCount());
337+
MatrixMLDataSet.this.calculatedInputSize * calculateLagCount());
235338
BasicMLData ideal = new BasicMLData(
236-
MatrixMLDataSet.this.calculatedIdealSize*calculateLeadCount());
339+
MatrixMLDataSet.this.calculatedIdealSize * calculateLeadCount());
237340
MLDataPair pair = new BasicMLDataPair(input, ideal);
238-
341+
239342
MatrixMLDataSet.this.getRecord(index, pair);
240343

241344
return pair;

src/main/java/org/encog/ml/data/versatile/NormalizationHelper.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
import org.encog.EncogError;
99
import org.encog.ml.data.MLData;
1010
import org.encog.ml.data.basic.BasicMLData;
11+
import org.encog.ml.data.versatile.columns.ColumnDefinition;
12+
import org.encog.ml.data.versatile.columns.ColumnType;
1113
import org.encog.ml.data.versatile.missing.MissingHandler;
14+
import org.encog.ml.data.versatile.normalizers.strategies.NormalizationStrategy;
1215
import org.encog.util.csv.CSVFormat;
1316

1417
public class NormalizationHelper {
@@ -189,13 +192,13 @@ public int normalizeToVector(ColumnDefinition colDef, int outputColumn, double[]
189192
if( colDef.getDataType()==ColumnType.continuous) {
190193
double d = parseDouble(value);
191194
if( handler!=null ) {
192-
d = handler.process(colDef,d);
195+
d = handler.processDouble(colDef);
193196
}
194197
return this.normStrategy.normalizeColumn(colDef, isInput, d,
195198
output, outputColumn);
196199
} else {
197200
if( handler!=null ) {
198-
value = handler.process(colDef,value);
201+
value = handler.processString(colDef);
199202
}
200203
return this.normStrategy.normalizeColumn(colDef, isInput, value,
201204
output, outputColumn);

src/main/java/org/encog/ml/data/versatile/NormalizationStrategy.java

Lines changed: 0 additions & 20 deletions
This file was deleted.

src/main/java/org/encog/ml/data/versatile/VersatileDataSource.java

Lines changed: 0 additions & 7 deletions
This file was deleted.

src/main/java/org/encog/ml/data/versatile/VersatileMLDataSet.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44

55
import org.encog.EncogError;
66
import org.encog.mathutil.randomize.generate.GenerateRandom;
7+
import org.encog.ml.data.versatile.columns.ColumnDefinition;
8+
import org.encog.ml.data.versatile.columns.ColumnType;
9+
import org.encog.ml.data.versatile.division.DataDivision;
10+
import org.encog.ml.data.versatile.division.PerformDataDivision;
11+
import org.encog.ml.data.versatile.normalizers.strategies.NormalizationStrategy;
12+
import org.encog.ml.data.versatile.sources.VersatileDataSource;
713

814
public class VersatileMLDataSet extends MatrixMLDataSet {
915

@@ -88,7 +94,6 @@ public void normalize() {
8894
throw new EncogError("Please choose a model type first, with selectMethod.");
8995
}
9096

91-
int normalizedRows = strat.calculateTotalRows(this.analyzedRows);
9297
int normalizedInputColumns = this.helper
9398
.calculateNormalizedInputCount();
9499
int normalizedOutputColumns = this.helper
@@ -99,7 +104,7 @@ public void normalize() {
99104
setCalculatedIdealSize(normalizedOutputColumns);
100105
setCalculatedInputSize(normalizedInputColumns);
101106

102-
setData(new double[normalizedRows][normalizedColumns]);
107+
setData(new double[this.analyzedRows][normalizedColumns]);
103108

104109
this.source.rewind();
105110
String[] line;

0 commit comments

Comments
 (0)