-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.js
More file actions
74 lines (69 loc) · 1.78 KB
/
utils.js
File metadata and controls
74 lines (69 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function prepareData(text, seqLength) {
let data = text.split("");
let [vocab,indVocab] = getVocab(data);
let dataX = [];
let dataY = [];
for (let i = 0; i < data.length - seqLength; i++){
let inSeq = data.slice(i, i+seqLength);
let outSeq = data[i+seqLength];
dataX.push(inSeq.map(x=>oneHot(vocab.size, vocab.get(x))));
dataY.push(oneHot(vocab.size, vocab.get(outSeq)));
}
return [dataX, dataY, vocab, indVocab];
}
function oneHot(size, at){
let vector = [];
for(let i = 0; i < size; i++){
if(at == i){
vector.push(1);
}
else{
vector.push(0);
}
}
return vector;
}
function oneHotString(text, vocab){
let output = [];
for(let i =0; i < text.length; i++){
let onehot = oneHot(vocab.size, vocab.get(text.charAt(i)));
output.push(onehot);
}
return output;
}
async function decodeOutput(data, vocab) {
let output = [];
for(let i = 0; i < data.shape[0]; i++){
let tensor = data.slice(i, 1);
tensor = tensor.reshape([vocab.length])
let index = tensor.argMax();
index = await index.data();
index = index[0];
let letter = vocab[index];
output.push(letter);
}
return output.join("");
}
function getVocab(arr) {
//get letter mapped to amount of occurances
let counts = new Map();
for(let i of arr){
if(counts.has(i)){
const value = counts.get(i);
counts.set(i, value+1);
}
else {
counts.set(i, 1);
}
}
// here we are taking those occurances and turning it in
// into a map from letter to how frequetly it appears relative to other letters
let indVocab = [];
let vocab = new Map(Array.from(counts).sort((a, b) => {
return b[1] - a[1];
}).map((value, i) => {
indVocab.push(value[0]);
return [value[0], i];
}));
return [vocab, indVocab];
}