Skip to content

Commit 8a4ab70

Browse files
authored
Merge pull request #171 from spreadsheetlab/restrict-row-pattern
Restrict row numbers in cell address
2 parents 892b72a + 801b22b commit 8a4ab70

2 files changed

Lines changed: 30 additions & 5 deletions

File tree

src/XLParser.Tests/ParserTests.cs

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,29 @@ public void CellReference()
214214
}
215215
}
216216

217+
[TestMethod]
218+
public void MaxRowAddress()
219+
{
220+
Test("A1048576", node => node.SkipToRelevant(true).Type() == GrammarNames.Cell);
221+
}
222+
223+
[TestMethod]
224+
public void InvalidRowAddress()
225+
{
226+
Test("A1048577", node => node.SkipToRelevant(true).Type() == GrammarNames.NamedRange);
227+
}
217228

229+
[TestMethod]
230+
public void MaxColumnAddress()
231+
{
232+
Test("XFD1", node => node.SkipToRelevant(true).Type() == GrammarNames.Cell);
233+
}
234+
235+
[TestMethod]
236+
public void InvalidColumnAddress()
237+
{
238+
Test("XFE1", node => node.SkipToRelevant(true).Type() == GrammarNames.NamedRange);
239+
}
218240

219241
[TestMethod]
220242
public void TestErrorCodeNull()
@@ -324,7 +346,7 @@ public void DoublePrefixedRange()
324346
[TestMethod]
325347
public void LongCellReference()
326348
{
327-
Test("Sheet2!A1234567");
349+
Test("Sheet2!A123456");
328350
}
329351

330352
[TestMethod]

src/XLParser/ExcelFormulaGrammar.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,15 @@ public class ExcelFormulaGrammar : Grammar
106106
#region References and names
107107

108108
private const string ColumnPattern = @"(?:[A-W][A-Z]{1,2}|X[A-E][A-Z]|XF[A-D]|[A-Z]{1,2})";
109+
private const string RowPattern = @"(?:104857[0-6]|10485[0-6][0-9]|1048[0-4][0-9]{2}|104[0-7][0-9]{3}|10[0-3][0-9]{4}|[1-9][0-9]{1,5}|[1-9])";
109110

110111
private static readonly string[] ColumnPrefix = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
111112
private static readonly string[] RowPrefix = Enumerable.Range('1', '9' - '1' + 1).Select(c => char.ToString((char)c)).Concat(new[] { "$" }).ToArray();
112113

113114
public Terminal VRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenVRange, "[$]?" + ColumnPattern + ":[$]?" + ColumnPattern, ColumnPrefix);
114-
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?[1-9][0-9]*:[$]?[1-9][0-9]*", RowPrefix);
115+
public Terminal HRangeToken { get; } = new RegexBasedTerminal(GrammarNames.TokenHRange, "[$]?" + RowPattern + ":[$]?" + RowPattern, RowPrefix);
115116

116-
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?[1-9][0-9]*";
117+
private const string CellTokenRegex = "[$]?" + ColumnPattern + "[$]?" + RowPattern;
117118
public Terminal CellToken { get; } = new RegexBasedTerminal(GrammarNames.TokenCell, CellTokenRegex, ColumnPrefix)
118119
{ Priority = TerminalPriority.CellToken };
119120

@@ -141,14 +142,16 @@ public class ExcelFormulaGrammar : Grammar
141142
// If we ever parse R1C1 references, make sure to include them here
142143
// TODO: Add all function names here
143144

144-
private const string NameInvalidWordsRegex =
145+
private const string NamedRangeCombinationRegex =
145146
"((TRUE|FALSE)" + NameValidCharacterRegex + "+)"
146147
// \w is equivalent to [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Nd}\p{Pc}], we want the decimal left out here because otherwise "A11" would be a combination token
147148
+ "|(" + CellTokenRegex + @"[\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Lm}\p{Pc}\\_\.\?]" + NameValidCharacterRegex + "*)"
149+
// allow large cell references (e.g. A1048577) as named range
150+
+ "|(" + ColumnPattern + @"(104857[7-9]|10485[89][0-9]|1048[6-9][0-9]{2}|1049[0-9]{3}|10[5-9][0-9]{4}|1[1-9][0-9]{5}|[2-9][0-9]{6}|d{8,})" + NameValidCharacterRegex + "*)"
148151
;
149152

150153
// To prevent e.g. "A1A1" being parsed as 2 cell tokens
151-
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NameInvalidWordsRegex,
154+
public Terminal NamedRangeCombinationToken { get; } = new RegexBasedTerminal(GrammarNames.TokenNamedRangeCombination, NamedRangeCombinationRegex,
152155
ColumnPrefix.Concat(new[] { "T", "F" }).ToArray())
153156
{ Priority = TerminalPriority.NamedRangeCombination };
154157

0 commit comments

Comments
 (0)