Skip to content

Commit 5cf8cd9

Browse files
committed
Add probability trees.
1 parent 7c33198 commit 5cf8cd9

2 files changed

Lines changed: 350 additions & 0 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ It appears the optimal bit selection is:
9696

9797
NOTE: This is found emperically. It may not be correct or as-designed.
9898

99+
## More complicated symbols
100+
101+
You can use [probabilitytree.h](probabilitytree.h) to encode symbols. For instance if you wanted to encode text, you could list out the frequencies of all your letters and call `ProbabilityTreeGenerateProbabilities`. This creates a table that can be used to encode or decode whole sybmols at a time with `ProbabilityTreeReadSym` and `ProbabilityTreeWriteSym`.
99102

100103
## Overall Properties
101104

probabilitytree.h

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
#ifndef _PROBABILITYTREE_H
2+
#define _PROBABILITYTREE_H
3+
4+
// Explaination on how to optimally store trees for storing elements that
5+
// require multiple bits when doing VPX coding.
6+
//
7+
// The encoder uses floating point math to allow for floating-point overall
8+
// probabilities. But, the decoding of these trees can be done on embedded
9+
// systems trivailly.
10+
//
11+
// The tree implicitly stores its structre based on its index, so only the
12+
// probabilities need to be provided.
13+
//
14+
// Copyright 2025 Charles Lohr (cnlohr) under the MIT license. See end of file.
15+
//
16+
//
17+
// MSB is always at root of tree so you can lop off unused portions of the
18+
// tree.
19+
//
20+
// Tree:
21+
// L0 0
22+
// L1 1 128
23+
// L2 2 65 129 192
24+
// L3 3 34 66 97 130 161 193 224
25+
//
26+
// L0 0
27+
// L1 1 8
28+
// L2 2 5 9 12
29+
// L3 3 4 6 7 10 11 13 14
30+
//
31+
// This could be thought of 2 ways. MSB down or LSB up.
32+
// For MSB Down
33+
// if( next MSB bit ) { place += 2^(total_bits-level) } else { place++ }
34+
// For LSB Up:
35+
// For each bit in max bits:
36+
// if( next LSB bit ) { place += 2^level } else { place++ }
37+
//
38+
// This way lower codes are always stored to the left side of the tree
39+
// so the right side can be lopped off.
40+
//
41+
42+
43+
44+
// You may want to tune the mult/shift values, I've seen some improvements in
45+
// slight adjustments.
46+
//
47+
#ifndef VPX_PROB_MULT
48+
#define VPX_PROB_MULT 257.0
49+
#endif
50+
51+
#ifndef VPX_PROB_SHIFT
52+
#define VPX_PROB_SHIFT (-0.0)
53+
#endif
54+
55+
#ifndef PROBABILITY_TREE_DECORATOR
56+
#define PROBABILITY_TREE_DECORATOR static
57+
#endif
58+
59+
PROBABILITY_TREE_DECORATOR inline int ProbabilityTreeBitsForMaxElement( unsigned elements );
60+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeGetSize( unsigned elements, unsigned needed_bits );
61+
PROBABILITY_TREE_DECORATOR void ProbabilityTreeGenerateProbabilities( uint8_t * probabilities, unsigned nr_probabilities, const float * frequencies, unsigned elements, unsigned needed_bits );
62+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeReadSym( vpx_reader * reader, uint8_t * probabilities, int num_probabilities, int bits_for_max_element );
63+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeWriteSym( vpx_writer * writer, int sym, uint8_t * probabilities, int num_probabilities, int bits_for_max_element );
64+
65+
66+
67+
// Used by below functions
68+
PROBABILITY_TREE_DECORATOR int ProbabilityTreePlaceByLevelPlace( int level, int placeinlevel, int totallevels )
69+
{
70+
int l;
71+
int p = 0;
72+
int vtoencode = placeinlevel << (totallevels-level);
73+
for( l = 0; l < level; l++ )
74+
{
75+
if( vtoencode & (1<<(totallevels-1)) )
76+
p += 1<<(totallevels-l-1);
77+
else
78+
p++;
79+
vtoencode <<= 1;
80+
}
81+
return p;
82+
}
83+
84+
PROBABILITY_TREE_DECORATOR inline int ProbabilityTreeBitsForMaxElement( unsigned elements )
85+
{
86+
#if (defined( __GNUC__ ) || defined( __clang__ ))
87+
return 32 - __builtin_clz( elements - 1 );
88+
#else
89+
int n = 32;
90+
unsigned y;
91+
unsigned x = elements - 1;
92+
y = x >>16; if (y != 0) { n = n -16; x = y; }
93+
y = x >> 8; if (y != 0) { n = n - 8; x = y; }
94+
y = x >> 4; if (y != 0) { n = n - 4; x = y; }
95+
y = x >> 2; if (y != 0) { n = n - 2; x = y; }
96+
y = x >> 1; if (y != 0) return 32 - (n - 2);
97+
return 32 - (n - x);
98+
#endif
99+
}
100+
101+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeGetSize( unsigned elements, unsigned needed_bits )
102+
{
103+
int chancetable_len = 0;
104+
int levelplace = needed_bits-1;
105+
int level;
106+
int n = elements - 1;
107+
for( level = 0; level < needed_bits; level++ )
108+
{
109+
int comparemask = 1<<(needed_bits-level-1); //i.e. 0x02 one fewer than the levelmask
110+
int bit = !!(n & comparemask);
111+
if( bit )
112+
chancetable_len += 1<<(needed_bits-level-1);
113+
else
114+
chancetable_len++;
115+
}
116+
return chancetable_len;
117+
}
118+
119+
float ProbabilityTreeComputeExpectedCost( float * fCounts, int nElements )
120+
{
121+
int n;
122+
float fRet = 0.0;
123+
float fSum = 0.0;
124+
125+
for( n = 0; n < nElements; n++ )
126+
{
127+
fSum += fCounts[n];
128+
}
129+
130+
for( n = 0; n < nElements; n++ )
131+
{
132+
float fC = fCounts[n];
133+
if( fC > 0 )
134+
{
135+
fRet += -log(fC / fSum)/log(2.0) * fC;
136+
}
137+
}
138+
return fRet;
139+
}
140+
141+
// OUTPUTS probabilities
142+
PROBABILITY_TREE_DECORATOR void ProbabilityTreeGenerateProbabilities( uint8_t * probabilities, unsigned nr_probabilities,
143+
const float * frequencies, unsigned elements, unsigned needed_bits )
144+
{
145+
int level;
146+
for( level = 0; level < needed_bits; level++ )
147+
{
148+
int maxmask = 1<<needed_bits;
149+
int levelmask = (0xffffffffULL >> (32 - level)) << (needed_bits-level); // i.e. 0xfc (number of bits that must match)
150+
int comparemask = 1<<(needed_bits-level-1); //i.e. 0x02 one fewer than the levelmask
151+
int lincmask = comparemask<<1;
152+
int maskcheck = 0;
153+
int placeinlevel = 0;
154+
for( maskcheck = 0; maskcheck < maxmask; maskcheck += lincmask )
155+
{
156+
float count1 = 0;
157+
float count0 = 0;
158+
int n;
159+
for( n = 0; n < (1<<needed_bits); n++ )
160+
{
161+
int tn = n;
162+
if( n >= elements ) continue;
163+
164+
if( ( tn & levelmask ) == (maskcheck) )
165+
{
166+
if( tn & comparemask )
167+
count1 += frequencies[n];
168+
else
169+
count0 += frequencies[n];
170+
}
171+
}
172+
double chanceof0 = count0 / (double)(count0 + count1);
173+
int prob = chanceof0 * VPX_PROB_MULT - VPX_PROB_SHIFT;
174+
if( prob < 0 ) prob = 0;
175+
if( prob > 255 ) prob = 255;
176+
int place = ProbabilityTreePlaceByLevelPlace( level, placeinlevel, needed_bits );
177+
if( place < nr_probabilities )
178+
probabilities[place] = prob;
179+
placeinlevel++;
180+
}
181+
}
182+
}
183+
184+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeRead( vpx_reader * reader, uint8_t * probabilities, int num_probabilities, int bits_for_max_element )
185+
{
186+
int probplace = 0;
187+
int ret = 0;
188+
int level;
189+
for( level = 0; level < bits_for_max_element; level++ )
190+
{
191+
if( probplace >= num_probabilities ) return -1;
192+
uint8_t probability = probabilities[probplace];
193+
int bit = vpx_read( reader, probability );
194+
ret |= bit<<(bits_for_max_element-level-1);
195+
if( bit )
196+
probplace += 1<<(bits_for_max_element-level-1);
197+
else
198+
probplace++;
199+
}
200+
return ret;
201+
}
202+
203+
PROBABILITY_TREE_DECORATOR int ProbabilityTreeWriteSym( vpx_writer * writer, int sym, uint8_t * probabilities, int num_probabilities, int bits_for_max_element )
204+
{
205+
int level;
206+
int probplace = 0;
207+
for( level = 0; level < bits_for_max_element; level++ )
208+
{
209+
int comparemask = 1<<(bits_for_max_element-level-1); //i.e. 0x02 one fewer than the levelmask
210+
int bit = !!(sym & comparemask);
211+
if( probplace >= num_probabilities ) return -1;
212+
uint8_t probability = probabilities[probplace];
213+
vpx_write( writer, bit, probability);
214+
if( bit )
215+
probplace += 1<<(bits_for_max_element-level-1);
216+
else
217+
probplace++;
218+
}
219+
return 0;
220+
}
221+
222+
/*
223+
224+
Notes:
225+
226+
Output:
227+
0
228+
1 8
229+
2 5 9 12
230+
3 4 6 7 10 11 13 14
231+
232+
0 0 0 0 0 0 0 0
233+
1 1 1 1 8 8 8 8
234+
2 2 5 5 9 9 12 12
235+
3 4 6 7 10 11 13 14
236+
237+
0 0 0 0 0 0 0 0
238+
1 8 1 8 1 8 1 8
239+
2 9 5 12 2 9 5 12
240+
3 10 6 13 4 11 7 14
241+
242+
243+
int main()
244+
{
245+
int tree_bits = 4;
246+
247+
int n;
248+
int l;
249+
for( l = 0; l < tree_bits; l++ )
250+
{
251+
for( n = 0; n < 1<<l; n++ )
252+
{
253+
printf( "%3d ", TreePlaceByLevelPlace( l, n, tree_bits ) );
254+
}
255+
printf( "\n" );
256+
}
257+
258+
printf( "\n" );
259+
// Computing, top-down (MSB first)
260+
// You would use this for ENCODING or DECODING a VPX Tree, when navigating downward.
261+
for( l = 0; l < tree_bits; l++ )
262+
{
263+
for( n = 0; n < 1<<(tree_bits-1); n++ )
264+
{
265+
int p = 0;
266+
int tl;
267+
268+
269+
// levelplace starts
270+
int levelplace = tree_bits-1;
271+
272+
// For each bit, pull off an MSB.
273+
for( tl = 0; tl < l; tl++ )
274+
{
275+
// msb here is not actually MSB but LSB, but when decoding you would use this, and you would "produce" MSB first.
276+
// This is the logic you would actually use. Pretend (1<<(treeplace-1)) is your own code.
277+
int msb = n & (1<<(levelplace-1));
278+
if( msb )
279+
p += 1<<levelplace;
280+
else
281+
p++;
282+
283+
levelplace--;
284+
}
285+
printf( "%3d", p );
286+
}
287+
printf( "\n" );
288+
}
289+
290+
printf( "\n" );
291+
// Computing, bottom-up (LSB first)
292+
// You will almost never need to do this.
293+
for( l = 0; l < tree_bits; l++ )
294+
{
295+
for( n = 0; n < 1<<(tree_bits-1); n++ )
296+
{
297+
int p = 0;
298+
int tl;
299+
300+
301+
// levelplace starts
302+
int levelplace = tree_bits-1;
303+
304+
// For each bit, pull off an MSB.
305+
for( tl = 0; tl < l; tl++ )
306+
{
307+
// msb here is not actually MSB but LSB, but when decoding you would use this, and you would "produce" MSB first.
308+
// This is the logic you would actually use. Pretend (1<<(treeplace-1)) is your own code.
309+
int lsb = (n >> tl) & 1;
310+
if( lsb )
311+
p += 1<<levelplace;
312+
else
313+
p++;
314+
315+
levelplace--;
316+
}
317+
printf( "%3d", p );
318+
}
319+
printf( "\n" );
320+
}
321+
322+
}
323+
324+
*/
325+
326+
/*
327+
Copyright 2025 <>< cnlohr (Charles Lohr)
328+
329+
Permission is hereby granted, free of charge, to any person obtaining a copy
330+
of this software and associated documentation files (the "Software"), to
331+
deal in the Software without restriction, including without limitation the
332+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
333+
sell copies of the Software, and to permit persons to whom the Software is
334+
furnished to do so, subject to the following conditions:
335+
336+
The above copyright notice and this permission notice shall be included in
337+
all copies or substantial portions of the Software.
338+
339+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
340+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
341+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
342+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
343+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
344+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
345+
DEALINGS IN THE SOFTWARE.
346+
*/
347+
#endif

0 commit comments

Comments
 (0)