Skip to content

Commit 1045d94

Browse files
chore: first model is now ready
1 parent 4742a48 commit 1045d94

21 files changed

Lines changed: 13687 additions & 4253 deletions

models/best_models/eng/best.pt

925 KB
Binary file not shown.
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"epoch": 9,
3-
"train_loss": 1.9133409197266038,
4-
"validation_loss": 2.0183915266623864,
2+
"epoch": 30,
3+
"train_loss": 0.805634579261144,
4+
"validation_loss": 1.290153529047966,
55
"train_exact_match": null,
66
"validation_exact_match": null
77
}

models/datasets/eng/stats.json

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,23 @@
22
"language": "eng",
33
"corpusPath": "C:\\Users\\jorts\\order\\models\\tokenizers\\eng\\corpus.jsonl",
44
"modelPath": "C:\\Users\\jorts\\order\\models\\tokenizers\\eng\\tokenizer.model",
5-
"sampleCount": 57,
6-
"trainCount": 43,
7-
"validationCount": 14,
5+
"sampleCount": 100,
6+
"trainCount": 75,
7+
"validationCount": 25,
88
"inputLengths": {
9-
"count": 57,
10-
"min": 34,
11-
"max": 422,
12-
"avg": 198.47368421052633,
13-
"p50": 191,
14-
"p95": 334
9+
"count": 100,
10+
"min": 27,
11+
"max": 480,
12+
"avg": 184.92,
13+
"p50": 169,
14+
"p95": 376
1515
},
1616
"labelLengths": {
17-
"count": 57,
18-
"min": 127,
19-
"max": 1899,
20-
"avg": 655.6842105263158,
21-
"p50": 627,
22-
"p95": 1162
17+
"count": 100,
18+
"min": 59,
19+
"max": 1849,
20+
"avg": 601.2,
21+
"p50": 609,
22+
"p95": 1036
2323
}
2424
}

models/datasets/eng/train.jsonl

Lines changed: 75 additions & 43 deletions
Large diffs are not rendered by default.

models/datasets/eng/validation.jsonl

Lines changed: 25 additions & 14 deletions
Large diffs are not rendered by default.

models/onnx_builds/eng/config.json

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,29 @@
11
{
22
"language": "eng",
3-
"format": "onnx-seq2seq-distribution",
3+
"format": "onnx-seq2seq-build",
44
"modelFile": "model.onnx",
55
"tokenizerModelFile": "tokenizer.model",
66
"tokenizerVocabFile": "tokenizer.vocab",
77
"metricsFile": "metrics.json",
8-
"inputNames": ["input_ids", "attention_mask", "decoder_input_ids"],
9-
"outputNames": ["logits"],
8+
"inputNames": [
9+
"input_ids",
10+
"attention_mask",
11+
"decoder_input_ids"
12+
],
13+
"outputNames": [
14+
"logits"
15+
],
1016
"tokenIds": {
1117
"bos": 1,
1218
"eos": 2,
13-
"pad": 665
19+
"pad": 4329
1420
},
1521
"limits": {
16-
"maxInputLength": 329,
17-
"maxDecoderLength": 1329
22+
"maxInputLength": 256,
23+
"maxDecoderLength": 513
1824
},
1925
"model": {
20-
"vocabSize": 665,
26+
"vocabSize": 4329,
2127
"dModel": 128,
2228
"numHeads": 4,
2329
"numEncoderLayers": 2,
@@ -27,11 +33,24 @@
2733
},
2834
"validation": {
2935
"dummy_input_shape": {
30-
"input_ids": [1, 16],
31-
"attention_mask": [1, 16],
32-
"decoder_input_ids": [1, 8]
36+
"input_ids": [
37+
1,
38+
16
39+
],
40+
"attention_mask": [
41+
1,
42+
16
43+
],
44+
"decoder_input_ids": [
45+
1,
46+
8
47+
]
3348
},
34-
"output_shape": [1, 8, 665],
35-
"max_abs_diff": 9.5367431640625e-7
49+
"output_shape": [
50+
1,
51+
8,
52+
4329
53+
],
54+
"max_abs_diff": 4.76837158203125e-06
3655
}
3756
}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"epoch": 1,
3-
"train_loss": 6.571187973022461,
4-
"validation_loss": 6.362093925476074,
2+
"epoch": 30,
3+
"train_loss": 0.805634579261144,
4+
"validation_loss": 1.290153529047966,
55
"train_exact_match": null,
66
"validation_exact_match": null
77
}

models/onnx_builds/eng/model.onnx

3.16 MB
Binary file not shown.
51.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)