@@ -843,7 +843,7 @@ var row = new Dictionary<string, object>
843843// Create database with larger pages
844844var options = new DatabaseOptions
845845{
846- PageSize = 8192 , // 8 KB pages → supports larger records
846+ PageSize = 8192 , // 8 KB pages → 8152 bytes available
847847 CreateImmediately = true ,
848848};
849849
@@ -1266,7 +1266,6 @@ var userWithRef = new Dictionary<string, object>
12661266
12671267
12681268````````
1269-
12701269This is the description of what the code block changes :
12711270Add comprehensive LOB (Large Object ) storage proposal as a future enhancement , explaining how it would work and why it 's needed
12721271
@@ -1465,3 +1464,352 @@ var biography = lobTable.FindByLobId(lobId)["Data"];
14651464
14661465
14671466
1467+
1468+ ````````
1469+
1470+ This is the description of what the code block changes:
1471+ Add practical string size calculator with formulas, examples, and API design for table creation
1472+
1473+ This is the code block that represents the suggested code change:
1474+
1475+ ````````markdown
1476+ ---
1477+
1478+ ## 📏 String Size Calculator & Table Design Guide
1479+
1480+ ### The Reality: Calculate Your Maximum String Size
1481+
1482+ When creating a table, you need to know: **Given all my columns, how large can a single string column be?**
1483+
1484+ #### Formula
1485+
1486+ ```
1487+ MaxStringSize = (PageSize - HeaderSize - OtherColumnsSize - SerializationOverhead)
1488+ ```
1489+
1490+ **Breaking it down:**
1491+
1492+ ```csharp
1493+ // Step 1: Fixed overhead per record
1494+ int columnCount = 4;
1495+ int baseOverhead = sizeof(int); // ColumnCount: 4 bytes
1496+
1497+ // Step 2: Per-column overhead (for NON-string columns)
1498+ int userIdOverhead = sizeof(int) + 1; // NameLen(4) + Name(6) + Type(1) + Value(4) = 15
1499+ int emailOverhead = sizeof(int) + 1; // NameLen(4) + Name(5) + Type(1) = 10
1500+
1501+ // Step 3: String column breakdown
1502+ // For a string, the formula is:
1503+ // NameLen(4) + ColumnName(N) + Type(1) + StringLen(4) + StringData(X)
1504+ int bioColumnNameLen = "Biography".Length; // 9 bytes
1505+ int bioOverhead = 4 + bioColumnNameLen + 1 + 4; // = 18 bytes
1506+ // Remaining space for string data:
1507+ int availableForBioData = MAX_PAGE_DATA_SIZE - baseOverhead - userIdOverhead - emailOverhead - bioOverhead;
1508+
1509+ // Example with 4KB page (4056 bytes available):
1510+ // 4056 - 4 - 15 - 10 - 18 = 4009 bytes available for Biography string!
1511+ ```
1512+
1513+ ### Practical Examples
1514+
1515+ #### Example 1: Small Records (4KB page)
1516+
1517+ ```csharp
1518+ // Table schema:
1519+ // ┌─────────────────┬──────────┬────────┐
1520+ // │ Column │ Type │ Size │
1521+ // ├─────────────────┼──────────┼────────┤
1522+ // │ UserId │ Int32 │ 4 bytes│
1523+ // │ Email │ String │ 50 max │
1524+ // │ Name │ String │ 100 max│
1525+ // │ Bio │ String │ ??? max│
1526+ // └─────────────────┴──────────┴────────┘
1527+
1528+ var schema = new Dictionary<string, (string Type, int? MaxBytes)>
1529+ {
1530+ ["UserId"] = ("Int32", 4),
1531+ ["Email"] = ("String", 50), // Fixed max of 50 bytes
1532+ ["Name"] = ("String", 100), // Fixed max of 100 bytes
1533+ ["Bio"] = ("String", null), // Variable - calculate below
1534+ };
1535+
1536+ // Calculation:
1537+ int pageDataSize = 4056; // 4KB page - 40 byte header
1538+ int overhead = 0;
1539+
1540+ // Base: ColumnCount
1541+ overhead += 4;
1542+
1543+ // Column 1: UserId (Int32)
1544+ overhead += 4; // NameLen("UserId" = 6)
1545+ overhead += 6;
1546+ overhead += 1; // Type marker
1547+ overhead += 4; // Value
1548+
1549+ // Column 2: Email (String, max 50 bytes)
1550+ overhead += 4; // NameLen("Email" = 5)
1551+ overhead += 5;
1552+ overhead += 1; // Type marker
1553+ overhead += 4; // StringLen
1554+ overhead += 50; // Max string data
1555+
1556+ // Column 3: Name (String, max 100 bytes)
1557+ overhead += 4; // NameLen("Name" = 4)
1558+ overhead += 4;
1559+ overhead += 1; // Type marker
1560+ overhead += 4; // StringLen
1561+ overhead += 100;// Max string data
1562+
1563+ // Column 4: Bio (String, remaining)
1564+ overhead += 4; // NameLen("Bio" = 3)
1565+ overhead += 3;
1566+ overhead += 1; // Type marker
1567+ overhead += 4; // StringLen
1568+
1569+ // Available for Bio string:
1570+ int maxBioSize = pageDataSize - overhead; // = 4056 - 192 = 3864 bytes!
1571+
1572+ Console.WriteLine($"Max Bio string: {maxBioSize} bytes");
1573+ // Result: Bio can be up to 3864 bytes (3.8KB)
1574+ ```
1575+
1576+ #### Example 2: Larger Records (8KB page)
1577+
1578+ ```csharp
1579+ // Same schema, but with 8KB page (8152 bytes available):
1580+ int pageDataSize8KB = 8152;
1581+ int maxBioSize8KB = pageDataSize8KB - 192; // = 7960 bytes!
1582+
1583+ Console.WriteLine($"Max Bio string (8KB page): {maxBioSize8KB} bytes");
1584+ // Result: Bio can be up to 7960 bytes (7.96KB)
1585+ ```
1586+
1587+ #### Example 3: Complex Schema
1588+
1589+ ```csharp
1590+ var complexSchema = new Dictionary<string, (string Type, int? MaxBytes)>
1591+ {
1592+ ["Id"] = ("ULID", 26), // ULID as string: "01ARZ3NDEKTSV4RRFFQ69G5FAV" = 26 bytes
1593+ ["CreatedAt"] = ("DateTime", 8),
1594+ ["UpdatedAt"] = ("DateTime", 8),
1595+ ["Status"] = ("String", 20), // enum: "ACTIVE", "INACTIVE", etc.
1596+ ["JSON"] = ("String", null), // Variable - calculate!
1597+ };
1598+
1599+ // Calculation:
1600+ int baseOverhead = 4 + (4+2+1+26) + (4+9+1+8) + (4+9+1+8) + (4+6+1+4+20) + (4+4+1+4);
1601+ // = 4 + 33 + 22 + 22 + 39 + 13
1602+ // = 133 bytes
1603+
1604+ int maxJsonSize = 4056 - 133; // = 3923 bytes for JSON!
1605+ ```
1606+
1607+ ### Implementation: Add to Table Creation API
1608+
1609+ ```csharp
1610+ // PROPOSAL: TableSchema with size validation
1611+
1612+ public class TableSchema
1613+ {
1614+ public int PageSize { get; set; }
1615+ public List<ColumnDefinition> Columns { get; set; }
1616+
1617+ /// <summary>
1618+ /// Validates that all records will fit within page size.
1619+ /// Returns: (maxStringSize for each string column, warnings)
1620+ /// </summary>
1621+ public TableSizeAnalysis AnalyzeSize()
1622+ {
1623+ int maxDataSize = PageSize - 40; // Header overhead
1624+ int fixedOverhead = CalculateFixedOverhead();
1625+
1626+ if (fixedOverhead >= maxDataSize)
1627+ {
1628+ throw new InvalidOperationException(
1629+ $"Table schema too large! Fixed overhead ({fixedOverhead}) " +
1630+ $"exceeds page data size ({maxDataSize})");
1631+ }
1632+
1633+ return new TableSizeAnalysis
1634+ {
1635+ PageSize = PageSize,
1636+ FixedOverhead = fixedOverhead,
1637+ AvailableForStrings = maxDataSize - fixedOverhead,
1638+ StringColumnLimits = CalculateStringLimits(),
1639+ };
1640+ }
1641+ }
1642+
1643+ public class TableSizeAnalysis
1644+ {
1645+ public int PageSize { get; set; }
1646+ public int FixedOverhead { get; set; }
1647+ public int AvailableForStrings { get; set; }
1648+ public Dictionary<string, int> StringColumnLimits { get; set; } // Column name → max bytes
1649+ }
1650+
1651+ // USAGE:
1652+ var schema = new TableSchema
1653+ {
1654+ PageSize = 4096,
1655+ Columns = new List<ColumnDefinition>
1656+ {
1657+ new("UserId", "Int32"),
1658+ new("Email", "String", maxLength: 50),
1659+ new("Name", "String", maxLength: 100),
1660+ new("Bio", "String"), // No max - will be calculated
1661+ }
1662+ };
1663+
1664+ var analysis = schema.AnalyzeSize();
1665+ Console.WriteLine($"Page size: {analysis.PageSize} bytes");
1666+ Console.WriteLine($"Fixed overhead: {analysis.FixedOverhead} bytes");
1667+ Console.WriteLine($"Available for strings: {analysis.AvailableForStrings} bytes");
1668+ Console.WriteLine();
1669+ foreach (var col in analysis.StringColumnLimits)
1670+ {
1671+ Console.WriteLine($"{col.Key}: max {col.Value} bytes");
1672+ }
1673+
1674+ // Output:
1675+ // Page size: 4096 bytes
1676+ // Fixed overhead: 192 bytes
1677+ // Available for strings: 3864 bytes
1678+ //
1679+ // Email: max 50 bytes
1680+ // Name: max 100 bytes
1681+ // Bio: max 3714 bytes (remaining)
1682+ ```
1683+
1684+ ### Practical Decision Tree
1685+
1686+ When designing your table:
1687+
1688+ ```
1689+ Do you have large strings?
1690+ │
1691+ ├─ NO (all < 1KB)
1692+ │ └─ Use 4KB page (default) ✅
1693+ │
1694+ ├─ YES, 1-5KB strings
1695+ │ └─ Use 8KB page
1696+ │
1697+ ├─ YES, 5-50KB strings
1698+ │ └─ Use 16KB page OR split into multiple records
1699+ │
1700+ └─ YES, > 50KB strings
1701+ └─ Use external storage (Phase 5 LOB feature)
1702+ OR split into multiple records
1703+ ```
1704+
1705+ ### Best Practices
1706+
1707+ **1. Always Calculate BEFORE Creating Table**
1708+
1709+ ```csharp
1710+ // BAD: Create table, then discover strings don't fit
1711+ var db = new SharpCoreDB();
1712+ var usersTable = db.CreateTable("Users");
1713+
1714+ // GOOD: Calculate first, then create
1715+ var analysis = new TableSchema { ... }.AnalyzeSize();
1716+ if (analysis.AvailableForStrings < expectedMaxStringSize)
1717+ {
1718+ // Use larger page size
1719+ }
1720+ ```
1721+
1722+ **2. Document Your Schema**
1723+
1724+ ```csharp
1725+ // Document the size constraints
1726+ public class UserRecord
1727+ {
1728+ public int UserId { get; set; }
1729+
1730+ /// <summary>
1731+ /// Email address. Max 50 bytes (typically 40-50 bytes for realistic emails).
1732+ /// </summary>
1733+ public string Email { get; set; }
1734+
1735+ /// <summary>
1736+ /// Full name. Max 100 bytes (typically 30-80 bytes for realistic names).
1737+ /// </summary>
1738+ public string Name { get; set; }
1739+
1740+ /// <summary>
1741+ /// Biography text. Max 3714 bytes (based on 4KB page with other columns).
1742+ /// If you need larger biographies, use external storage or increase page size to 8KB (7960 bytes).
1743+ /// </summary>
1744+ public string Bio { get; set; }
1745+ }
1746+ ```
1747+
1748+ **3. Add Validation**
1749+
1750+ ```csharp
1751+ // Validate before insert
1752+ public class User
1753+ {
1754+ private const int MaxBioBytes = 3714;
1755+
1756+ public void ValidateForInsert()
1757+ {
1758+ int bioBytes = Encoding.UTF8.GetByteCount(Bio ?? "");
1759+ if (bioBytes > MaxBioBytes)
1760+ {
1761+ throw new ArgumentException(
1762+ $"Bio exceeds max size: {bioBytes} > {MaxBioBytes} bytes");
1763+ }
1764+ }
1765+ }
1766+ ```
1767+
1768+ **4. Test Edge Cases**
1769+
1770+ ```csharp
1771+ [Fact]
1772+ public void InsertRecord_WithMaxSizeString_Should_Succeed()
1773+ {
1774+ var row = new Dictionary<string, object>
1775+ {
1776+ ["UserId"] = 1,
1777+ ["Email"] = "test@example.com",
1778+ ["Name"] = "John Doe",
1779+ ["Bio"] = new string('X', 3714), // Max size
1780+ };
1781+
1782+ // Should succeed
1783+ usersTable.Insert(row);
1784+ }
1785+
1786+ [Fact]
1787+ public void InsertRecord_WithOversizeString_Should_Throw()
1788+ {
1789+ var row = new Dictionary<string, object>
1790+ {
1791+ ["UserId"] = 1,
1792+ ["Email"] = "test@example.com",
1793+ ["Name"] = "John Doe",
1794+ ["Bio"] = new string('X', 3715), // One byte over!
1795+ };
1796+
1797+ // Should throw InvalidOperationException
1798+ Assert.Throws<InvalidOperationException>(() => usersTable.Insert(row));
1799+ }
1800+ ```
1801+
1802+ ---
1803+
1804+
1805+
1806+
1807+
1808+
1809+
1810+
1811+
1812+
1813+
1814+
1815+
0 commit comments