Skip to content

Commit 4cfa323

Browse files
author
MPCoreDeveloper
committed
docs: Add string size calculator and table design guide
1 parent bb67b24 commit 4cfa323

File tree

1 file changed

+350
-2
lines changed

1 file changed

+350
-2
lines changed

docs/serialization/SERIALIZATION_AND_STORAGE_GUIDE.md

Lines changed: 350 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -843,7 +843,7 @@ var row = new Dictionary<string, object>
843843
// Create database with larger pages
844844
var options = new DatabaseOptions
845845
{
846-
PageSize = 8192, // 8 KB pages → supports larger records
846+
PageSize = 8192, // 8 KB pages → 8152 bytes available
847847
CreateImmediately = true,
848848
};
849849

@@ -1266,7 +1266,6 @@ var userWithRef = new Dictionary<string, object>
12661266

12671267

12681268
````````
1269-
12701269
This is the description of what the code block changes:
12711270
Add comprehensive LOB (Large Object) storage proposal as a future enhancement, explaining how it would work and why it's needed
12721271

@@ -1465,3 +1464,352 @@ var biography = lobTable.FindByLobId(lobId)["Data"];
14651464

14661465

14671466

1467+
1468+
````````
1469+
1470+
This is the description of what the code block changes:
1471+
Add practical string size calculator with formulas, examples, and API design for table creation
1472+
1473+
This is the code block that represents the suggested code change:
1474+
1475+
````````markdown
1476+
---
1477+
1478+
## 📏 String Size Calculator & Table Design Guide
1479+
1480+
### The Reality: Calculate Your Maximum String Size
1481+
1482+
When creating a table, you need to know: **Given all my columns, how large can a single string column be?**
1483+
1484+
#### Formula
1485+
1486+
```
1487+
MaxStringSize = (PageSize - HeaderSize - OtherColumnsSize - SerializationOverhead)
1488+
```
1489+
1490+
**Breaking it down:**
1491+
1492+
```csharp
1493+
// Step 1: Fixed overhead per record
1494+
int columnCount = 4;
1495+
int baseOverhead = sizeof(int); // ColumnCount: 4 bytes
1496+
1497+
// Step 2: Per-column overhead (for NON-string columns)
1498+
int userIdOverhead = sizeof(int) + 1; // NameLen(4) + Name(6) + Type(1) + Value(4) = 15
1499+
int emailOverhead = sizeof(int) + 1; // NameLen(4) + Name(5) + Type(1) = 10
1500+
1501+
// Step 3: String column breakdown
1502+
// For a string, the formula is:
1503+
// NameLen(4) + ColumnName(N) + Type(1) + StringLen(4) + StringData(X)
1504+
int bioColumnNameLen = "Biography".Length; // 9 bytes
1505+
int bioOverhead = 4 + bioColumnNameLen + 1 + 4; // = 18 bytes
1506+
// Remaining space for string data:
1507+
int availableForBioData = MAX_PAGE_DATA_SIZE - baseOverhead - userIdOverhead - emailOverhead - bioOverhead;
1508+
1509+
// Example with 4KB page (4056 bytes available):
1510+
// 4056 - 4 - 15 - 10 - 18 = 4009 bytes available for Biography string!
1511+
```
1512+
1513+
### Practical Examples
1514+
1515+
#### Example 1: Small Records (4KB page)
1516+
1517+
```csharp
1518+
// Table schema:
1519+
// ┌─────────────────┬──────────┬────────┐
1520+
// │ Column │ Type │ Size │
1521+
// ├─────────────────┼──────────┼────────┤
1522+
// │ UserId │ Int32 │ 4 bytes│
1523+
// │ Email │ String │ 50 max │
1524+
// │ Name │ String │ 100 max│
1525+
// │ Bio │ String │ ??? max│
1526+
// └─────────────────┴──────────┴────────┘
1527+
1528+
var schema = new Dictionary<string, (string Type, int? MaxBytes)>
1529+
{
1530+
["UserId"] = ("Int32", 4),
1531+
["Email"] = ("String", 50), // Fixed max of 50 bytes
1532+
["Name"] = ("String", 100), // Fixed max of 100 bytes
1533+
["Bio"] = ("String", null), // Variable - calculate below
1534+
};
1535+
1536+
// Calculation:
1537+
int pageDataSize = 4056; // 4KB page - 40 byte header
1538+
int overhead = 0;
1539+
1540+
// Base: ColumnCount
1541+
overhead += 4;
1542+
1543+
// Column 1: UserId (Int32)
1544+
overhead += 4; // NameLen("UserId" = 6)
1545+
overhead += 6;
1546+
overhead += 1; // Type marker
1547+
overhead += 4; // Value
1548+
1549+
// Column 2: Email (String, max 50 bytes)
1550+
overhead += 4; // NameLen("Email" = 5)
1551+
overhead += 5;
1552+
overhead += 1; // Type marker
1553+
overhead += 4; // StringLen
1554+
overhead += 50; // Max string data
1555+
1556+
// Column 3: Name (String, max 100 bytes)
1557+
overhead += 4; // NameLen("Name" = 4)
1558+
overhead += 4;
1559+
overhead += 1; // Type marker
1560+
overhead += 4; // StringLen
1561+
overhead += 100;// Max string data
1562+
1563+
// Column 4: Bio (String, remaining)
1564+
overhead += 4; // NameLen("Bio" = 3)
1565+
overhead += 3;
1566+
overhead += 1; // Type marker
1567+
overhead += 4; // StringLen
1568+
1569+
// Available for Bio string:
1570+
int maxBioSize = pageDataSize - overhead; // = 4056 - 192 = 3864 bytes!
1571+
1572+
Console.WriteLine($"Max Bio string: {maxBioSize} bytes");
1573+
// Result: Bio can be up to 3864 bytes (3.8KB)
1574+
```
1575+
1576+
#### Example 2: Larger Records (8KB page)
1577+
1578+
```csharp
1579+
// Same schema, but with 8KB page (8152 bytes available):
1580+
int pageDataSize8KB = 8152;
1581+
int maxBioSize8KB = pageDataSize8KB - 192; // = 7960 bytes!
1582+
1583+
Console.WriteLine($"Max Bio string (8KB page): {maxBioSize8KB} bytes");
1584+
// Result: Bio can be up to 7960 bytes (7.96KB)
1585+
```
1586+
1587+
#### Example 3: Complex Schema
1588+
1589+
```csharp
1590+
var complexSchema = new Dictionary<string, (string Type, int? MaxBytes)>
1591+
{
1592+
["Id"] = ("ULID", 26), // ULID as string: "01ARZ3NDEKTSV4RRFFQ69G5FAV" = 26 bytes
1593+
["CreatedAt"] = ("DateTime", 8),
1594+
["UpdatedAt"] = ("DateTime", 8),
1595+
["Status"] = ("String", 20), // enum: "ACTIVE", "INACTIVE", etc.
1596+
["JSON"] = ("String", null), // Variable - calculate!
1597+
};
1598+
1599+
// Calculation:
1600+
int baseOverhead = 4 + (4+2+1+26) + (4+9+1+8) + (4+9+1+8) + (4+6+1+4+20) + (4+4+1+4);
1601+
// = 4 + 33 + 22 + 22 + 39 + 13
1602+
// = 133 bytes
1603+
1604+
int maxJsonSize = 4056 - 133; // = 3923 bytes for JSON!
1605+
```
1606+
1607+
### Implementation: Add to Table Creation API
1608+
1609+
```csharp
1610+
// PROPOSAL: TableSchema with size validation
1611+
1612+
public class TableSchema
1613+
{
1614+
public int PageSize { get; set; }
1615+
public List<ColumnDefinition> Columns { get; set; }
1616+
1617+
/// <summary>
1618+
/// Validates that all records will fit within page size.
1619+
/// Returns: (maxStringSize for each string column, warnings)
1620+
/// </summary>
1621+
public TableSizeAnalysis AnalyzeSize()
1622+
{
1623+
int maxDataSize = PageSize - 40; // Header overhead
1624+
int fixedOverhead = CalculateFixedOverhead();
1625+
1626+
if (fixedOverhead >= maxDataSize)
1627+
{
1628+
throw new InvalidOperationException(
1629+
$"Table schema too large! Fixed overhead ({fixedOverhead}) " +
1630+
$"exceeds page data size ({maxDataSize})");
1631+
}
1632+
1633+
return new TableSizeAnalysis
1634+
{
1635+
PageSize = PageSize,
1636+
FixedOverhead = fixedOverhead,
1637+
AvailableForStrings = maxDataSize - fixedOverhead,
1638+
StringColumnLimits = CalculateStringLimits(),
1639+
};
1640+
}
1641+
}
1642+
1643+
public class TableSizeAnalysis
1644+
{
1645+
public int PageSize { get; set; }
1646+
public int FixedOverhead { get; set; }
1647+
public int AvailableForStrings { get; set; }
1648+
public Dictionary<string, int> StringColumnLimits { get; set; } // Column name → max bytes
1649+
}
1650+
1651+
// USAGE:
1652+
var schema = new TableSchema
1653+
{
1654+
PageSize = 4096,
1655+
Columns = new List<ColumnDefinition>
1656+
{
1657+
new("UserId", "Int32"),
1658+
new("Email", "String", maxLength: 50),
1659+
new("Name", "String", maxLength: 100),
1660+
new("Bio", "String"), // No max - will be calculated
1661+
}
1662+
};
1663+
1664+
var analysis = schema.AnalyzeSize();
1665+
Console.WriteLine($"Page size: {analysis.PageSize} bytes");
1666+
Console.WriteLine($"Fixed overhead: {analysis.FixedOverhead} bytes");
1667+
Console.WriteLine($"Available for strings: {analysis.AvailableForStrings} bytes");
1668+
Console.WriteLine();
1669+
foreach (var col in analysis.StringColumnLimits)
1670+
{
1671+
Console.WriteLine($"{col.Key}: max {col.Value} bytes");
1672+
}
1673+
1674+
// Output:
1675+
// Page size: 4096 bytes
1676+
// Fixed overhead: 192 bytes
1677+
// Available for strings: 3864 bytes
1678+
//
1679+
// Email: max 50 bytes
1680+
// Name: max 100 bytes
1681+
// Bio: max 3714 bytes (remaining)
1682+
```
1683+
1684+
### Practical Decision Tree
1685+
1686+
When designing your table:
1687+
1688+
```
1689+
Do you have large strings?
1690+
1691+
├─ NO (all < 1KB)
1692+
│ └─ Use 4KB page (default) ✅
1693+
1694+
├─ YES, 1-5KB strings
1695+
│ └─ Use 8KB page
1696+
1697+
├─ YES, 5-50KB strings
1698+
│ └─ Use 16KB page OR split into multiple records
1699+
1700+
└─ YES, > 50KB strings
1701+
└─ Use external storage (Phase 5 LOB feature)
1702+
OR split into multiple records
1703+
```
1704+
1705+
### Best Practices
1706+
1707+
**1. Always Calculate BEFORE Creating Table**
1708+
1709+
```csharp
1710+
// BAD: Create table, then discover strings don't fit
1711+
var db = new SharpCoreDB();
1712+
var usersTable = db.CreateTable("Users");
1713+
1714+
// GOOD: Calculate first, then create
1715+
var analysis = new TableSchema { ... }.AnalyzeSize();
1716+
if (analysis.AvailableForStrings < expectedMaxStringSize)
1717+
{
1718+
// Use larger page size
1719+
}
1720+
```
1721+
1722+
**2. Document Your Schema**
1723+
1724+
```csharp
1725+
// Document the size constraints
1726+
public class UserRecord
1727+
{
1728+
public int UserId { get; set; }
1729+
1730+
/// <summary>
1731+
/// Email address. Max 50 bytes (typically 40-50 bytes for realistic emails).
1732+
/// </summary>
1733+
public string Email { get; set; }
1734+
1735+
/// <summary>
1736+
/// Full name. Max 100 bytes (typically 30-80 bytes for realistic names).
1737+
/// </summary>
1738+
public string Name { get; set; }
1739+
1740+
/// <summary>
1741+
/// Biography text. Max 3714 bytes (based on 4KB page with other columns).
1742+
/// If you need larger biographies, use external storage or increase page size to 8KB (7960 bytes).
1743+
/// </summary>
1744+
public string Bio { get; set; }
1745+
}
1746+
```
1747+
1748+
**3. Add Validation**
1749+
1750+
```csharp
1751+
// Validate before insert
1752+
public class User
1753+
{
1754+
private const int MaxBioBytes = 3714;
1755+
1756+
public void ValidateForInsert()
1757+
{
1758+
int bioBytes = Encoding.UTF8.GetByteCount(Bio ?? "");
1759+
if (bioBytes > MaxBioBytes)
1760+
{
1761+
throw new ArgumentException(
1762+
$"Bio exceeds max size: {bioBytes} > {MaxBioBytes} bytes");
1763+
}
1764+
}
1765+
}
1766+
```
1767+
1768+
**4. Test Edge Cases**
1769+
1770+
```csharp
1771+
[Fact]
1772+
public void InsertRecord_WithMaxSizeString_Should_Succeed()
1773+
{
1774+
var row = new Dictionary<string, object>
1775+
{
1776+
["UserId"] = 1,
1777+
["Email"] = "test@example.com",
1778+
["Name"] = "John Doe",
1779+
["Bio"] = new string('X', 3714), // Max size
1780+
};
1781+
1782+
// Should succeed
1783+
usersTable.Insert(row);
1784+
}
1785+
1786+
[Fact]
1787+
public void InsertRecord_WithOversizeString_Should_Throw()
1788+
{
1789+
var row = new Dictionary<string, object>
1790+
{
1791+
["UserId"] = 1,
1792+
["Email"] = "test@example.com",
1793+
["Name"] = "John Doe",
1794+
["Bio"] = new string('X', 3715), // One byte over!
1795+
};
1796+
1797+
// Should throw InvalidOperationException
1798+
Assert.Throws<InvalidOperationException>(() => usersTable.Insert(row));
1799+
}
1800+
```
1801+
1802+
---
1803+
1804+
1805+
1806+
1807+
1808+
1809+
1810+
1811+
1812+
1813+
1814+
1815+

0 commit comments

Comments
 (0)