Skip to content

Commit 91417ff

Browse files
committed
text.c: Improve text rendering performance again by treating font sheets as combined
As it turns out, the sytem font texture sheets are all 128x32 pixels and adjacent in memory! We can reinterpet the memory starting at sheet 0 and describe a much bigger texture that encompasses all of the ASCII glyphs and make our cache use that instead of the individual sheets. This will massively improve performance by reducing texture swaps within a piece of text, down to 0 if it's all English. We don't need any extra linear allocating to do this! The coalescing will be applied to all characters / glyph sheets up until the last `glyphInfo.nSheets % 32` sheets. This means that there are more operations per glyph being done in `textGetGlyphPosFromCodePoint`, but this is probably offset by the savings from not switching textures as often. And, this won't matter for English text, which has these results cached.
1 parent 82a3a14 commit 91417ff

1 file changed

Lines changed: 64 additions & 20 deletions

File tree

source/text.c

Lines changed: 64 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,84 @@
11
#include "text.h"
22

33
#define NUM_ASCII_CHARS 128
4+
#define SHEETS_PER_BIG_SHEET 32
45

56
static C3D_Tex* s_glyphSheets;
67
static float s_textScale;
78
static int s_textLang = CFG_LANGUAGE_EN;
9+
static uint32_t s_numSheetsThatWereCombined;
810
static charWidthInfo_s* s_asciiCacheCharWidth[NUM_ASCII_CHARS];
911
// @Note: Could use s_asciiCacheCharWidth to reimplement fontCalcGlyphPos, but it would cache slightly less computations.
1012
static fontGlyphPos_s s_asciiCacheGlyphPos[NUM_ASCII_CHARS];
1113

14+
static fontGlyphPos_s _textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float scaleX, float scaleY)
15+
{
16+
fontGlyphPos_s result;
17+
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, code);
18+
fontCalcGlyphPos(&result, NULL, glyphIdx, flags, scaleX, scaleY);
19+
20+
if (result.sheetIndex < s_numSheetsThatWereCombined)
21+
{
22+
uint32_t indexWithinBigSheet = result.sheetIndex % SHEETS_PER_BIG_SHEET;
23+
result.sheetIndex /= SHEETS_PER_BIG_SHEET;
24+
25+
// Readjust glyph UVs to account for being a part of the combined texture.
26+
result.texcoord.top = (result.texcoord.top + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
27+
result.texcoord.bottom = (result.texcoord.bottom + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
28+
}
29+
else
30+
{
31+
result.sheetIndex -= s_numSheetsThatWereCombined * SHEETS_PER_BIG_SHEET;
32+
}
33+
34+
return result;
35+
}
36+
37+
static void fillSheet(C3D_Tex *tex, void *data, TGLP_s *glyphInfo)
38+
{
39+
tex->data = data;
40+
tex->fmt = glyphInfo->sheetFmt;
41+
tex->size = glyphInfo->sheetSize;
42+
tex->width = glyphInfo->sheetWidth;
43+
tex->height = glyphInfo->sheetHeight;
44+
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
45+
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
46+
tex->border = 0;
47+
tex->lodParam = 0;
48+
}
49+
1250
void textInit(void)
1351
{
1452
// Ensure the shared system font is mapped
1553
fontEnsureMapped();
1654

55+
CFNT_s* font = fontGetSystemFont();
1756
// Load the glyph texture sheets
18-
int i;
1957
TGLP_s* glyphInfo = fontGetGlyphInfo(NULL);
20-
s_glyphSheets = malloc(sizeof(C3D_Tex)*glyphInfo->nSheets);
58+
59+
// As it turns out, the sytem font texture sheets are all 128x32 pixels and adjacent in memory! We can reinterpet
60+
// the memory starting at sheet 0 and describe a much bigger texture that encompasses all of the ASCII glyphs and
61+
// make our cache use that instead of the individual sheets. This will massively improve performance by reducing
62+
// texture swaps within a piece of text, down to 0 if it's all English. We don't need any extra linear allocating to
63+
// do this!
64+
uint32_t numSheetsBig = glyphInfo->nSheets / SHEETS_PER_BIG_SHEET;
65+
uint32_t numSheetsSmall = glyphInfo->nSheets % SHEETS_PER_BIG_SHEET;
66+
uint32_t numSheetsTotal = numSheetsBig + numSheetsSmall;
67+
s_numSheetsThatWereCombined = glyphInfo->nSheets - numSheetsSmall;
68+
69+
s_glyphSheets = malloc(sizeof(C3D_Tex)*numSheetsTotal);
2170
s_textScale = 30.0f / glyphInfo->cellHeight;
22-
for (i = 0; i < glyphInfo->nSheets; i ++)
71+
for (uint32_t i = 0; i < numSheetsBig; i++)
2372
{
2473
C3D_Tex* tex = &s_glyphSheets[i];
25-
tex->data = fontGetGlyphSheetTex(NULL, i);
26-
tex->fmt = glyphInfo->sheetFmt;
27-
tex->size = glyphInfo->sheetSize;
28-
tex->width = glyphInfo->sheetWidth;
29-
tex->height = glyphInfo->sheetHeight;
30-
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
31-
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
32-
tex->border = 0;
33-
tex->lodParam = 0;
74+
fillSheet(tex, fontGetGlyphSheetTex(font, i * SHEETS_PER_BIG_SHEET), glyphInfo);
75+
tex->height = (uint16_t) (tex->height * SHEETS_PER_BIG_SHEET);
76+
tex->size = tex->size * SHEETS_PER_BIG_SHEET;
77+
}
78+
79+
for (uint32_t i = 0; i < numSheetsSmall; i++)
80+
{
81+
fillSheet(&s_glyphSheets[numSheetsBig + i], fontGetGlyphSheetTex(font, numSheetsBig * SHEETS_PER_BIG_SHEET + i), glyphInfo);
3482
}
3583

3684
// Cache up front the results of fontGetCharWidthInfo and fontCalcGlyphPos for ASCII characters, since these functions
@@ -39,7 +87,7 @@ void textInit(void)
3987
{
4088
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, i);
4189
s_asciiCacheCharWidth[i] = fontGetCharWidthInfo(NULL, glyphIdx);
42-
fontCalcGlyphPos(&s_asciiCacheGlyphPos[i], NULL, glyphIdx, GLYPH_POS_CALC_VTXCOORD, 1, 1);
90+
s_asciiCacheGlyphPos[i] = _textGetGlyphPosFromCodePoint(i, GLYPH_POS_CALC_VTXCOORD, 1, 1);
4391
}
4492

4593
Result res = cfguInit();
@@ -104,11 +152,9 @@ charWidthInfo_s *textGetCharWidthFromCodePoint(uint32_t code)
104152

105153
fontGlyphPos_s textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float scaleX, float scaleY)
106154
{
107-
fontGlyphPos_s result;
108-
109155
if (code < NUM_ASCII_CHARS)
110156
{
111-
result = s_asciiCacheGlyphPos[code];
157+
fontGlyphPos_s result = s_asciiCacheGlyphPos[code];
112158

113159
if ((flags & GLYPH_POS_AT_BASELINE))
114160
{
@@ -124,14 +170,12 @@ fontGlyphPos_s textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float
124170
result.vtxcoord.right *= scaleX;
125171
result.vtxcoord.top *= scaleY;
126172
result.vtxcoord.bottom *= scaleY;
173+
return result;
127174
}
128175
else
129176
{
130-
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, code);
131-
fontCalcGlyphPos(&result, NULL, glyphIdx, flags, scaleX, scaleY);
177+
return _textGetGlyphPosFromCodePoint(code, flags, scaleX, scaleY);
132178
}
133-
134-
return result;
135179
}
136180

137181
float textCalcWidth(const char* text)

0 commit comments

Comments
 (0)