Skip to content

Commit ca93338

Browse files
committed
text.c: Improve text rendering performance again by treating font sheets as combined
As it turns out, the sytem font texture sheets are all 128x32 pixels and adjacent in memory! We can reinterpet the memory starting at sheet 0 and describe a much bigger texture that encompasses all of the ASCII glyphs and make our cache use that instead of the individual sheets. This will massively improve performance by reducing texture swaps within a piece of text, down to 0 if it's all English. We don't need any extra linear allocating to do this! The coalescing will be applied to all characters / glyph sheets up until the last `glyphInfo.nSheets % 32` sheets. This means that there are more operations per glyph being done in `textGetGlyphPosFromCodePoint`, but this is probably offset by the savings from not switching textures as often. And, this won't matter for English text, which has these results cached.
1 parent 82a3a14 commit ca93338

1 file changed

Lines changed: 63 additions & 20 deletions

File tree

source/text.c

Lines changed: 63 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,83 @@
11
#include "text.h"
22

33
#define NUM_ASCII_CHARS 128
4+
#define SHEETS_PER_BIG_SHEET 32
45

56
static C3D_Tex* s_glyphSheets;
67
static float s_textScale;
78
static int s_textLang = CFG_LANGUAGE_EN;
9+
static uint32_t s_numFontSheetsCombined;
810
static charWidthInfo_s* s_asciiCacheCharWidth[NUM_ASCII_CHARS];
911
// @Note: Could use s_asciiCacheCharWidth to reimplement fontCalcGlyphPos, but it would cache slightly less computations.
1012
static fontGlyphPos_s s_asciiCacheGlyphPos[NUM_ASCII_CHARS];
1113

14+
static fontGlyphPos_s _textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float scaleX, float scaleY)
15+
{
16+
fontGlyphPos_s result;
17+
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, code);
18+
fontCalcGlyphPos(&result, NULL, glyphIdx, flags, scaleX, scaleY);
19+
20+
if (result.sheetIndex < s_numFontSheetsCombined)
21+
{
22+
uint32_t indexWithinBigSheet = result.sheetIndex % SHEETS_PER_BIG_SHEET;
23+
result.sheetIndex /= SHEETS_PER_BIG_SHEET;
24+
25+
// Readjust glyph UVs to account for being a part of the combined texture.
26+
result.texcoord.top = (result.texcoord.top + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
27+
result.texcoord.bottom = (result.texcoord.bottom + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
28+
}
29+
else
30+
{
31+
result.sheetIndex = result.sheetIndex - s_numFontSheetsCombined + s_numFontSheetsCombined / SHEETS_PER_BIG_SHEET;
32+
}
33+
34+
return result;
35+
}
36+
37+
static void fillSheet(C3D_Tex *tex, void *data, TGLP_s *glyphInfo)
38+
{
39+
tex->data = data;
40+
tex->fmt = glyphInfo->sheetFmt;
41+
tex->size = glyphInfo->sheetSize;
42+
tex->width = glyphInfo->sheetWidth;
43+
tex->height = glyphInfo->sheetHeight;
44+
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
45+
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
46+
tex->border = 0;
47+
tex->lodParam = 0;
48+
}
49+
1250
void textInit(void)
1351
{
1452
// Ensure the shared system font is mapped
1553
fontEnsureMapped();
1654

55+
CFNT_s* font = fontGetSystemFont();
1756
// Load the glyph texture sheets
18-
int i;
1957
TGLP_s* glyphInfo = fontGetGlyphInfo(NULL);
20-
s_glyphSheets = malloc(sizeof(C3D_Tex)*glyphInfo->nSheets);
58+
59+
// The way TGLP_s is set up, all of a font's texture sheets are adjacent in memory and have the same size. We can
60+
// reinterpet the memory to describe a smaller set of much taller textures if we'd like. If we choose the right size,
61+
// we can get all of the ASCII glyphs under a single texture, which will massively improve performance by reducing
62+
// texture swaps within a piece of all-English text down to 0! We don't need any extra linear allocating to do this!
63+
uint32_t numSheetsBig = glyphInfo->nSheets / SHEETS_PER_BIG_SHEET;
64+
uint32_t numSheetsSmall = glyphInfo->nSheets % SHEETS_PER_BIG_SHEET;
65+
uint32_t numSheetsTotal = numSheetsBig + numSheetsSmall;
66+
s_numFontSheetsCombined = glyphInfo->nSheets - numSheetsSmall;
67+
68+
s_glyphSheets = malloc(sizeof(C3D_Tex)*numSheetsTotal);
2169
s_textScale = 30.0f / glyphInfo->cellHeight;
22-
for (i = 0; i < glyphInfo->nSheets; i ++)
70+
for (uint32_t i = 0; i < numSheetsBig; i++)
2371
{
2472
C3D_Tex* tex = &s_glyphSheets[i];
25-
tex->data = fontGetGlyphSheetTex(NULL, i);
26-
tex->fmt = glyphInfo->sheetFmt;
27-
tex->size = glyphInfo->sheetSize;
28-
tex->width = glyphInfo->sheetWidth;
29-
tex->height = glyphInfo->sheetHeight;
30-
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
31-
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
32-
tex->border = 0;
33-
tex->lodParam = 0;
73+
fillSheet(tex, fontGetGlyphSheetTex(font, i * SHEETS_PER_BIG_SHEET), glyphInfo);
74+
tex->height = (uint16_t) (tex->height * SHEETS_PER_BIG_SHEET);
75+
tex->size = tex->size * SHEETS_PER_BIG_SHEET;
76+
}
77+
78+
for (uint32_t i = 0; i < numSheetsSmall; i++)
79+
{
80+
fillSheet(&s_glyphSheets[numSheetsBig + i], fontGetGlyphSheetTex(font, numSheetsBig * SHEETS_PER_BIG_SHEET + i), glyphInfo);
3481
}
3582

3683
// Cache up front the results of fontGetCharWidthInfo and fontCalcGlyphPos for ASCII characters, since these functions
@@ -39,7 +86,7 @@ void textInit(void)
3986
{
4087
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, i);
4188
s_asciiCacheCharWidth[i] = fontGetCharWidthInfo(NULL, glyphIdx);
42-
fontCalcGlyphPos(&s_asciiCacheGlyphPos[i], NULL, glyphIdx, GLYPH_POS_CALC_VTXCOORD, 1, 1);
89+
s_asciiCacheGlyphPos[i] = _textGetGlyphPosFromCodePoint(i, GLYPH_POS_CALC_VTXCOORD, 1, 1);
4390
}
4491

4592
Result res = cfguInit();
@@ -104,11 +151,9 @@ charWidthInfo_s *textGetCharWidthFromCodePoint(uint32_t code)
104151

105152
fontGlyphPos_s textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float scaleX, float scaleY)
106153
{
107-
fontGlyphPos_s result;
108-
109154
if (code < NUM_ASCII_CHARS)
110155
{
111-
result = s_asciiCacheGlyphPos[code];
156+
fontGlyphPos_s result = s_asciiCacheGlyphPos[code];
112157

113158
if ((flags & GLYPH_POS_AT_BASELINE))
114159
{
@@ -124,14 +169,12 @@ fontGlyphPos_s textGetGlyphPosFromCodePoint(uint32_t code, uint32_t flags, float
124169
result.vtxcoord.right *= scaleX;
125170
result.vtxcoord.top *= scaleY;
126171
result.vtxcoord.bottom *= scaleY;
172+
return result;
127173
}
128174
else
129175
{
130-
int glyphIdx = fontGlyphIndexFromCodePoint(NULL, code);
131-
fontCalcGlyphPos(&result, NULL, glyphIdx, flags, scaleX, scaleY);
176+
return _textGetGlyphPosFromCodePoint(code, flags, scaleX, scaleY);
132177
}
133-
134-
return result;
135178
}
136179

137180
float textCalcWidth(const char* text)

0 commit comments

Comments
 (0)