-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtree.c
More file actions
361 lines (283 loc) · 9.53 KB
/
tree.c
File metadata and controls
361 lines (283 loc) · 9.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <assert.h>
#include "tree.h"
#include "FGK_functions.h"
// additional function to print the tree in inorder
void TreeShowFunction(Node root);
// among all nodes with the same occ, return the node pointer with the max label
Node FindNodeNumberMaxInBlock(Node root, int occ);
Node NodeCreate(int c, int label, int occ, Node left, Node right, Node parent){
// first create the node
Node n = (Node) malloc(sizeof(struct _Node));
assert(n != NULL);
// fill information
n->c = c;
n->label = label;
n->occ = occ;
n->left = left;
n->right = right;
n->parent = parent;
return n;
}
Node NodeDestroy(Node n){
// recursive delete the children first, then delete this node
if (n != NULL){
NodeDestroy(n->left);
NodeDestroy(n->right);
// now free
free(n);
n = NULL;
}
return n;
}
// recursion, upward trace until reach root
void NodePrintCode(Node n, FILE* fp_out, char* out_c, int* out_c_num){
assert(n != NULL);
// stop when reach the root
if (n->c != ROOT_C){
// if not root node yet, continue upwards
NodePrintCode(n->parent, fp_out, out_c, out_c_num);
// check left or right child
if (n == n->parent->left){
// left = 0, only print 1 bit
print_to_file(fp_out, out_c, out_c_num, 0, 1);
}
else{
// right = 1, only print 1 bit
print_to_file(fp_out, out_c, out_c_num, 1, 1);
}
}
return;
}
Tree TreeCreate(void){
Tree tr = (Tree) malloc(sizeof(struct _Tree));
assert(tr != NULL);
// create the root node, and initially this node is also the NYT
tr->root = NodeCreate(ROOT_C, LABEL_START, 0, NULL, NULL, NULL);
tr->NYT = tr->root;
return tr;
}
Tree TreeDestroy(Tree tr){
assert(tr != NULL);
NodeDestroy(tr->root);
free(tr);
tr = NULL;
return tr;
}
void TreeShow(Tree tr){
assert(tr != NULL);
TreeShowFunction(tr->root);
fprintf(stdout, "\n");
return;
}
// inorder traversal of the tree: left -> root -> right
void TreeShowFunction(Node n){
if (n != NULL){
// left
TreeShowFunction(n->left);
// middle root
if (n->c == ROOT_C){
fprintf(stdout, "(Root,%d,%d) ", n->label, n->occ);
}
else if (n->c == INTERNAL_NODE_C){
fprintf(stdout, "(Internal,%d,%d) ", n->label, n->occ);
}
else if(n->c == NYT_C){
fprintf(stdout, "(NYT,%d,%d) ", n->label, n->occ);
}
else{
fprintf(stdout, "(%c,%d,%d) ", n->c, n->label, n->occ);
}
// right
TreeShowFunction(n->right);
}
return;
}
void TreeUpdate(Tree tr, NodeList ndlist, int c, FILE* fp_out, char *out_c, int* out_c_num){
assert(tr != NULL);
assert(ndlist != NULL);
// first determine if the list contain the node or not
Node n = NodeListFindNode(ndlist, c);
if (n != NULL){
// not first occurrence for this letter
NodePrintCode(n, fp_out, out_c, out_c_num);
// node has been created before
TreeUpdateFunction(tr, ndlist, n);
}
else{
// first occurrence for this letter
// print the code for NYT
NodePrintCode(tr->NYT, fp_out, out_c, out_c_num);
// and also print the new char, the new char has 8 bit
print_to_file(fp_out, out_c, out_c_num, c, 8);
// n is empty, create the node first
// n->label = current NYT label - 1
// n->parent = current NYT
n = NodeCreate(c, tr->NYT->label-1, 1, NULL, NULL, tr->NYT);
NodeListAddNode(ndlist, n);
Node newNYT = NodeCreate(NYT_C, tr->NYT->label-2, 0, NULL, NULL, tr->NYT);
// rearrange the oldNYT connection parts
tr->NYT->left = newNYT;
tr->NYT->right = n;
tr->NYT->occ += 1;
// so right now, the separation of old NYT to two new nodes have been finished
// also the old NYT weight has been increased
// reassign the NYT
tr->NYT = newNYT;
// check if it is the root node
// if the old NYT is the root node, do nothing, do not need to update tree any further
if(tr->NYT->parent->c != ROOT_C){
tr->NYT->parent->c = INTERNAL_NODE_C;
// go to its parent node for further update
TreeUpdateFunction(tr, ndlist, tr->NYT->parent->parent);
}
}
return;
}
void TreeUpdateFunction(Tree tr, NodeList ndlist, Node n){
// tree and the node should not be null
// but nodelist can be null (when decompression)
assert(tr != NULL && n != NULL);
Node target;
while (n->c != ROOT_C){
// check if its node number is max in the block
// traversal the whole tree
target = FindNodeNumberMaxInBlock(tr->root, n->occ);
// there are 3 possible results:
// 1. target = n->parent, 2. target is null, 3. target is purely different
// if yes, ignore, if not do the swap
if (target == n->parent || target == NULL){
// increase weight
n->occ += 1;
// move to its parent level
n = n->parent;
}
else{
// swap: three scenario
// swap internal node with internal
// swap internal with leaf
// swap leaf with leaf
// so we need to notice that
// if simply break and re-construct from bottom up, is not enough
// also need to reconstrct from top to down
Node n_old_parent = n->parent;
Node target_old_parent = target->parent;
// first break from top to down
// the label may be not continuous, so need to use pointer comparison
if(n == n_old_parent->right){
// n is the right child
n_old_parent->right = target;
}
else{
// n is the left child
n_old_parent->left = target;
}
// same as the target side
if(target == target_old_parent->right){
target_old_parent->right = n;
}
else{
target_old_parent->left = n;
}
// the above are top-down approach, now finish the bottom up
n->parent = target_old_parent;
target->parent = n_old_parent;
// finally, swap the label
int tmp_label;
tmp_label = n->label;
n->label = target->label;
target->label = tmp_label;
// and remember to update the node list, for compression
if (ndlist != NULL){
if (n->c >= 0){
ndlist[n->c] = n;
}
if (target->c >= 0){
ndlist[target->c] = target;
}
}
// at last, increase the occ of n, and move to its parent
n->occ += 1;
n = n->parent;
}
}
// for the root node, only need to update the counter
n->occ += 1;
return;
}
Node FindNodeNumberMaxInBlock(Node root, int occ){
Node result = NULL;
// use the idea that: occ decreases when going deeper, same as the label
if (root != NULL){
if (root->occ == occ){
// does not need to go deeper
// since child's node number definitely smaller than root node number
result = root;
}
else if (root->occ < occ){
result = NULL;
}
else{
// now root->occ > occ
// go deeper
Node left_max = FindNodeNumberMaxInBlock(root->left, occ);
Node right_max = FindNodeNumberMaxInBlock(root->right, occ);
// if one solution is null or both is null
if(left_max == NULL && right_max == NULL){
result = NULL;
}
else if (left_max == NULL){
result = right_max;
}
else if (right_max == NULL){
result = left_max;
}
else{
// both are not null
// for the same occ, return the max label node
// the label number must be different
if (left_max->label > right_max->label){
result = left_max;
}
else if (left_max->label < right_max->label){
result = right_max;
}
else{
fprintf(stderr, "label number same error\n");
exit(EXIT_FAILURE);
}
}
}
}
return result;
}
NodeList NodeListCreate(void){
NodeList ndlist = (NodeList) malloc(ALPHABET_SIZE * sizeof(Node));
assert(ndlist != NULL);
// null every entry
for (int i = 0; i < ALPHABET_SIZE; i++){
ndlist[i] = NULL;
}
return ndlist;
}
NodeList NodeListDestroy(NodeList ndlist){
assert(ndlist != NULL);
// at the time to destroy ndlist, the tree should already be destroyed
// so no need to call NodeDestroy to all entries
free(ndlist);
ndlist = NULL;
return ndlist;
}
void NodeListAddNode(NodeList ndlist, Node n){
assert(ndlist != NULL);
assert(n != NULL && n->c != ROOT_C && n->c != INTERNAL_NODE_C);
ndlist[n->c] = n;
return;
}
Node NodeListFindNode(NodeList ndlist, int c){
assert(ndlist != NULL);
// if no node for that char, then return NULL
return ndlist[c];
}