Skip to content

Commit e16e551

Browse files
committed
Improve text rendering performance again by treating font sheets as combined
The way TGLP_s is set up, all of a font's texture sheets are adjacent in memory and have the same size. We can reinterpet the memory to describe a smaller set of much taller textures if we'd like. If we choose the right size, we can get all of the ASCII glyphs under a single texture, which will massively improve performance by reducing texture swaps within a piece of all-English text down to 0! We don't need any extra linear allocating to do this! The coalescing will be applied to all characters / glyph sheets up until the last `glyphInfo->nSheets % 32` sheets. This means that there are more operations per glyph being done in `C2D_FontCalcGlyphPos`, but this is probably offset by the savings from not switching textures as often. And, this won't matter for English text, which has these results cached.
1 parent a91f28c commit e16e551

File tree

3 files changed

+57
-14
lines changed

3 files changed

+57
-14
lines changed

source/font.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <c2d/font.h>
66

77
fontGlyphPos_s g_systemFontASCIICache[128];
8+
u32 g_numFontSheetsCombined;
89

910
C2D_Font C2D_FontLoad(const char* filename)
1011
{
@@ -245,9 +246,27 @@ charWidthInfo_s* C2D_FontGetCharWidthInfo(C2D_Font font, int glyphIndex)
245246
void C2D_FontCalcGlyphPos(C2D_Font font, fontGlyphPos_s* out, int glyphIndex, u32 flags, float scaleX, float scaleY)
246247
{
247248
if (!font)
249+
{
248250
fontCalcGlyphPos(out, fontGetSystemFont(), glyphIndex, flags, scaleX, scaleY);
251+
252+
if (out->sheetIndex < g_numFontSheetsCombined)
253+
{
254+
u32 indexWithinBigSheet = out->sheetIndex % SHEETS_PER_BIG_SHEET;
255+
out->sheetIndex /= SHEETS_PER_BIG_SHEET;
256+
257+
// Readjust glyph UVs to account for being a part of the combined texture.
258+
out->texcoord.top = (out->texcoord.top + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
259+
out->texcoord.bottom = (out->texcoord.bottom + (SHEETS_PER_BIG_SHEET - indexWithinBigSheet - 1)) / (float) SHEETS_PER_BIG_SHEET;
260+
}
261+
else
262+
{
263+
out->sheetIndex = out->sheetIndex - g_numFontSheetsCombined + g_numFontSheetsCombined / SHEETS_PER_BIG_SHEET;
264+
}
265+
}
249266
else
267+
{
250268
fontCalcGlyphPos(out, font->cfnt, glyphIndex, flags, scaleX, scaleY);
269+
}
251270
}
252271

253272
void C2D_FontCalcGlyphPosFromCodePoint(C2D_Font font, fontGlyphPos_s* out, u32 codepoint, u32 flags, float scaleX, float scaleY)

source/internal.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,6 @@ void C2Di_Update(void);
122122

123123
#define NUM_ASCII_CHARACTERS 128
124124
extern fontGlyphPos_s g_systemFontASCIICache[NUM_ASCII_CHARACTERS];
125+
126+
#define SHEETS_PER_BIG_SHEET 32
127+
extern u32 g_numFontSheetsCombined;

source/text.c

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,19 @@ static int C2Di_GlyphComp(const void* _g1, const void* _g2)
5757
return ret;
5858
}
5959

60+
static void fillSheet(C3D_Tex *tex, void *data, TGLP_s *glyphInfo)
61+
{
62+
tex->data = data;
63+
tex->fmt = glyphInfo->sheetFmt;
64+
tex->size = glyphInfo->sheetSize;
65+
tex->width = glyphInfo->sheetWidth;
66+
tex->height = glyphInfo->sheetHeight;
67+
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
68+
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
69+
tex->border = 0;
70+
tex->lodParam = 0;
71+
}
72+
6073
static void C2Di_TextEnsureLoad(void)
6174
{
6275
// Skip if already loaded
@@ -70,24 +83,32 @@ static void C2Di_TextEnsureLoad(void)
7083
// Load the glyph texture sheets
7184
CFNT_s* font = fontGetSystemFont();
7285
TGLP_s* glyphInfo = fontGetGlyphInfo(font);
73-
s_glyphSheets = malloc(sizeof(C3D_Tex)*glyphInfo->nSheets);
74-
s_textScale = 30.0f / glyphInfo->cellHeight;
86+
87+
// The way TGLP_s is set up, all of a font's texture sheets are adjacent in memory and have the same size. We can
88+
// reinterpet the memory to describe a smaller set of much taller textures if we'd like. If we choose the right size,
89+
// we can get all of the ASCII glyphs under a single texture, which will massively improve performance by reducing
90+
// texture swaps within a piece of all-English text down to 0! We don't need any extra linear allocating to do this!
91+
u32 numSheetsBig = glyphInfo->nSheets / SHEETS_PER_BIG_SHEET;
92+
u32 numSheetsSmall = glyphInfo->nSheets % SHEETS_PER_BIG_SHEET;
93+
u32 numSheetsTotal = numSheetsBig + numSheetsSmall;
94+
g_numFontSheetsCombined = glyphInfo->nSheets - numSheetsSmall;
95+
96+
s_glyphSheets = malloc(sizeof(C3D_Tex)*numSheetsTotal);
7597
if (!s_glyphSheets)
7698
svcBreak(USERBREAK_PANIC);
77-
78-
int i;
79-
for (i = 0; i < glyphInfo->nSheets; i ++)
99+
memset(s_glyphSheets, 0, sizeof(sizeof(C3D_Tex)*numSheetsTotal));
100+
s_textScale = 30.0f / glyphInfo->cellHeight;
101+
for (u32 i = 0; i < numSheetsBig; i++)
80102
{
81103
C3D_Tex* tex = &s_glyphSheets[i];
82-
tex->data = fontGetGlyphSheetTex(font, i);
83-
tex->fmt = glyphInfo->sheetFmt;
84-
tex->size = glyphInfo->sheetSize;
85-
tex->width = glyphInfo->sheetWidth;
86-
tex->height = glyphInfo->sheetHeight;
87-
tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR)
88-
| GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER);
89-
tex->border = 0;
90-
tex->lodParam = 0;
104+
fillSheet(tex, fontGetGlyphSheetTex(font, i * SHEETS_PER_BIG_SHEET), glyphInfo);
105+
tex->height = (uint16_t) (tex->height * SHEETS_PER_BIG_SHEET);
106+
tex->size = tex->size * SHEETS_PER_BIG_SHEET;
107+
}
108+
109+
for (u32 i = 0; i < numSheetsSmall; i++)
110+
{
111+
fillSheet(&s_glyphSheets[numSheetsBig + i], fontGetGlyphSheetTex(font, numSheetsBig * SHEETS_PER_BIG_SHEET + i), glyphInfo);
91112
}
92113

93114
// Initialize system font ASCII cache for C2D_FontCalcGlyphPosFromCodePoint

0 commit comments

Comments
 (0)