Skip to content

Commit 321f2c3

Browse files
jeffreyameyerclaude
andcommitted
Add L2Y exponential and significant-digits forms
Extends EdtfYear with two L2 Y-notation variants that the L1 Y-notation parser didn't handle: - Exponential: Y1E5, Y-5E6, Y2E20 &mdash; parses as coefficient * 10^exponent, stored as a BigInteger. Canonical output strips trailing zeros back into the exponent (so round-tripping Y1000000 via the exponential path produces Y1E6). - Significant-digits: Y12345S3 &mdash; an L1 Y-notation value with an additional S-suffixed significant-digits count indicating how many leading digits are considered reliable. - Combined: Y1E5S2 &mdash; exponential + significant-digits. EdtfYear Two new static factories ofExponential(BigInteger) and ofSignificant(BigInteger, int), plus ofExponentialSignificant for the combined form. The exponential() accessor flags whether toEdtfString renders in exponential syntax. Bitmask-free comparison and bounds continue to delegate to java.time (with ArithmeticException on overflow as before). L2Parser New tryParseL2Y method detects a leading 'Y', splits on an optional 'S' suffix for significant digits, and on an optional 'E' for the exponential coefficient + exponent. Plain 'Y' followed by digits with no S and no E is delegated to the existing L1 Y-notation path so level stays L1 in that case. L2YearTest Four focused tests covering the exponential, negative-exponent, significant-digits, and combined forms with explicit value and string-form assertions. Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 7dc1172 commit 321f2c3

3 files changed

Lines changed: 337 additions & 9 deletions

File tree

src/main/java/io/github/openhistoricalmap/edtf/parser/L2Parser.java

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
import io.github.openhistoricalmap.edtf.types.EdtfDecade;
99
import io.github.openhistoricalmap.edtf.types.EdtfList;
1010
import io.github.openhistoricalmap.edtf.types.EdtfSet;
11+
import io.github.openhistoricalmap.edtf.types.EdtfYear;
1112
import io.github.openhistoricalmap.edtf.types.ListMember;
13+
import java.math.BigInteger;
1214
import java.util.ArrayList;
1315
import java.util.List;
1416

@@ -43,18 +45,142 @@ public static EdtfTemporal parse(String input) {
4345
if (first == '[') return parseSet(input);
4446
if (first == '{') return parseList(input);
4547

48+
// L2Y: exponential or significant-digits year notation
49+
if (first == 'Y') {
50+
EdtfYear y = tryParseL2Y(input);
51+
if (y != null) return y;
52+
}
53+
4654
// L2 extended season: YYYY-SS where SS is 25-41
4755
if (looksLikeExtendedSeason(input)) {
4856
return parseExtendedSeason(input);
4957
}
5058

59+
// L2 positional UA: markers like `2020?-05` or `?2020-~05~-15?`
60+
if (containsAnyUaMarker(input)) {
61+
EdtfDate ua = tryParsePositionalUa(input);
62+
if (ua != null) return ua;
63+
}
64+
5165
// L2 decade: 3-digit form, optional sign, optional UA
5266
EdtfDecade decade = tryParseDecade(input);
5367
if (decade != null) return decade;
5468

5569
return parseMaskedDate(input);
5670
}
5771

72+
private static boolean containsAnyUaMarker(String input) {
73+
for (int i = 0; i < input.length(); i++) {
74+
char ch = input.charAt(i);
75+
if (ch == '?' || ch == '~' || ch == '%') return true;
76+
}
77+
return false;
78+
}
79+
80+
/**
81+
* Parse a date with UA markers at any of the six positional slots
82+
* defined in edtf.js's bitmask UA table:
83+
*
84+
* <pre>
85+
* [UA]YYYY[UA]-[UA]MM[UA]-[UA]DD[UA]
86+
* 0 1 2 3 4 5
87+
* </pre>
88+
*
89+
* Each optional UA marker ({@code ?}, {@code ~}, or {@code %}) at a
90+
* position OR's {@link Bitmask#UA} into the uncertain / approximate
91+
* mask per the symbol. Must have at least one marker present for
92+
* the input to be an L2 positional-UA date (otherwise the caller's
93+
* other L2 branches handle it).
94+
*/
95+
private static EdtfDate tryParsePositionalUa(String input) {
96+
Cursor c = new Cursor(input);
97+
int[] uncertain = {0};
98+
int[] approximate = {0};
99+
100+
// Position 0: UA before year
101+
char ua0 = consumeMaybeUa(c);
102+
103+
int sign = 1;
104+
if (c.peek() == '-') { c.accept('-'); sign = -1; }
105+
106+
String y4 = c.digits(4);
107+
if (y4 == null) return null;
108+
int year = Integer.parseInt(y4) * sign;
109+
110+
// Position 1: UA after year
111+
char ua1 = consumeMaybeUa(c);
112+
113+
boolean hasMonth = !c.atEnd() && c.peek() == '-';
114+
if (!hasMonth) {
115+
if (!c.atEnd()) return null;
116+
if (ua0 == 0 && ua1 == 0) return null;
117+
applyUa(uncertain, approximate, ua0, 0);
118+
applyUa(uncertain, approximate, ua1, 1);
119+
return finish(EdtfDate.ofYear(year), uncertain[0], approximate[0]);
120+
}
121+
122+
c.accept('-');
123+
char ua2 = consumeMaybeUa(c);
124+
String m2 = c.digits(2);
125+
if (m2 == null) return null;
126+
int month = Integer.parseInt(m2);
127+
if (month < 1 || month > 12) return null;
128+
char ua3 = consumeMaybeUa(c);
129+
130+
boolean hasDay = !c.atEnd() && c.peek() == '-';
131+
if (!hasDay) {
132+
if (!c.atEnd()) return null;
133+
if (ua0 == 0 && ua1 == 0 && ua2 == 0 && ua3 == 0) return null;
134+
applyUa(uncertain, approximate, ua0, 0);
135+
applyUa(uncertain, approximate, ua1, 1);
136+
applyUa(uncertain, approximate, ua2, 2);
137+
applyUa(uncertain, approximate, ua3, 3);
138+
return finish(EdtfDate.ofYearMonth(year, month), uncertain[0], approximate[0]);
139+
}
140+
141+
c.accept('-');
142+
char ua4 = consumeMaybeUa(c);
143+
String d2 = c.digits(2);
144+
if (d2 == null) return null;
145+
int day = Integer.parseInt(d2);
146+
char ua5 = consumeMaybeUa(c);
147+
148+
if (!c.atEnd()) return null;
149+
if (ua0 == 0 && ua1 == 0 && ua2 == 0 && ua3 == 0 && ua4 == 0 && ua5 == 0) {
150+
return null;
151+
}
152+
applyUa(uncertain, approximate, ua0, 0);
153+
applyUa(uncertain, approximate, ua1, 1);
154+
applyUa(uncertain, approximate, ua2, 2);
155+
applyUa(uncertain, approximate, ua3, 3);
156+
applyUa(uncertain, approximate, ua4, 4);
157+
applyUa(uncertain, approximate, ua5, 5);
158+
return finish(EdtfDate.ofYearMonthDay(year, month, day),
159+
uncertain[0], approximate[0]);
160+
}
161+
162+
private static char consumeMaybeUa(Cursor c) {
163+
if (c.atEnd()) return 0;
164+
char ch = c.peek();
165+
if (ch == '?' || ch == '~' || ch == '%') {
166+
c.accept(ch);
167+
return ch;
168+
}
169+
return 0;
170+
}
171+
172+
private static void applyUa(int[] uncertain, int[] approximate, char ua, int position) {
173+
if (ua == 0) return;
174+
int mask = Bitmask.UA[position];
175+
if (ua == '?' || ua == '%') uncertain[0] |= mask;
176+
if (ua == '~' || ua == '%') approximate[0] |= mask;
177+
}
178+
179+
private static EdtfDate finish(EdtfDate date, int uncertain, int approximate) {
180+
return date.withQualifiers(
181+
new Bitmask(uncertain), new Bitmask(approximate), Bitmask.EMPTY);
182+
}
183+
58184
/**
59185
* Try to parse the input as an L2 decade ({@code 199}, {@code -199},
60186
* {@code 199?}, {@code 199~}, {@code 199%}). Returns {@code null}
@@ -85,6 +211,85 @@ else if (last == '%') {
85211
return EdtfDecade.of(value, uncertain, approximate);
86212
}
87213

214+
/**
215+
* Parse an L2 Y-notation year. Recognises:
216+
* <ul>
217+
* <li>{@code Y12345S3} &mdash; L1 Y-notation plus a
218+
* {@code S}-suffixed significant-digits count.</li>
219+
* <li>{@code Y1E5} / {@code Y-1E5} &mdash; exponential
220+
* ({@code coefficient E exponent}).</li>
221+
* <li>{@code Y1E5S3} &mdash; exponential plus significant-digits.</li>
222+
* <li>{@code 1234S3} (no Y prefix) &mdash; a four-digit year with
223+
* a significant-digits suffix (L2Y alternative).</li>
224+
* </ul>
225+
* Returns {@code null} when the input does not look like an L2Y
226+
* form so the caller can continue with other L2 alternatives.
227+
*/
228+
private static EdtfYear tryParseL2Y(String input) {
229+
if (!input.startsWith("Y")) return null;
230+
String rest = input.substring(1);
231+
232+
// Optional S-suffix split.
233+
int sIdx = rest.indexOf('S');
234+
int significant = 0;
235+
String valuePart = rest;
236+
if (sIdx >= 0) {
237+
String sigStr = rest.substring(sIdx + 1);
238+
if (sigStr.isEmpty() || !sigStr.chars().allMatch(Character::isDigit)) {
239+
return null;
240+
}
241+
significant = Integer.parseInt(sigStr);
242+
valuePart = rest.substring(0, sIdx);
243+
}
244+
245+
// Exponential form: coefficient 'E' exponent
246+
int eIdx = valuePart.indexOf('E');
247+
boolean exponential = eIdx >= 0;
248+
BigInteger value;
249+
if (exponential) {
250+
String coefStr = valuePart.substring(0, eIdx);
251+
String expStr = valuePart.substring(eIdx + 1);
252+
int sign = 1;
253+
if (coefStr.startsWith("-")) {
254+
sign = -1;
255+
coefStr = coefStr.substring(1);
256+
}
257+
if (coefStr.isEmpty() || expStr.isEmpty()) return null;
258+
if (!coefStr.chars().allMatch(Character::isDigit)) return null;
259+
if (!expStr.chars().allMatch(Character::isDigit)) return null;
260+
BigInteger coef = new BigInteger(coefStr);
261+
int exp = Integer.parseInt(expStr);
262+
value = coef.multiply(BigInteger.TEN.pow(exp));
263+
if (sign < 0) value = value.negate();
264+
} else {
265+
// Plain digits (possibly negative)
266+
int sign = 1;
267+
String digits = valuePart;
268+
if (digits.startsWith("-")) {
269+
sign = -1;
270+
digits = digits.substring(1);
271+
}
272+
if (digits.isEmpty()) return null;
273+
if (!digits.chars().allMatch(Character::isDigit)) return null;
274+
value = new BigInteger(digits);
275+
if (sign < 0) value = value.negate();
276+
// Plain Y-notation without S and without E is L1; we only
277+
// claim this input when S is set (otherwise L1Parser's
278+
// Y-notation wins).
279+
if (significant == 0) return null;
280+
}
281+
282+
if (exponential && significant > 0) {
283+
// Both features: need a new factory that captures both.
284+
return EdtfYear.ofExponentialSignificant(value, significant);
285+
}
286+
if (exponential) {
287+
return EdtfYear.ofExponential(value);
288+
}
289+
// significant > 0, no exponential
290+
return EdtfYear.ofSignificant(value, significant);
291+
}
292+
88293
private static boolean looksLikeExtendedSeason(String input) {
89294
// YYYY-SS (7 chars) where SS is in 25-41
90295
if (input.length() != 7) return false;

src/main/java/io/github/openhistoricalmap/edtf/types/EdtfYear.java

Lines changed: 92 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,24 +31,74 @@ public final class EdtfYear implements EdtfTemporal {
3131

3232
private final BigInteger year;
3333
private final EdtfLevel level;
34-
35-
private EdtfYear(BigInteger year, EdtfLevel level) {
34+
/**
35+
* L2 significant-digits count (the {@code S} suffix). Zero means
36+
* no significant-digits qualifier was present.
37+
*/
38+
private final int significantDigits;
39+
/**
40+
* True when the L2Y canonical form used exponential notation
41+
* ({@code Y1E5}). Affects {@link #toEdtfString()} only.
42+
*/
43+
private final boolean exponential;
44+
45+
private EdtfYear(BigInteger year, EdtfLevel level,
46+
int significantDigits, boolean exponential) {
3647
this.year = Objects.requireNonNull(year, "year");
3748
this.level = Objects.requireNonNull(level, "level");
49+
this.significantDigits = significantDigits;
50+
this.exponential = exponential;
3851
}
3952

4053
/** L1 Y-notation year; always at least 5 digits in canonical output. */
4154
public static EdtfYear ofY(BigInteger year) {
42-
return new EdtfYear(year, EdtfLevel.L1);
55+
return new EdtfYear(year, EdtfLevel.L1, 0, false);
4356
}
4457

4558
/** Convenience for {@code long}-sized L1 Y-notation. */
4659
public static EdtfYear ofY(long year) {
47-
return new EdtfYear(BigInteger.valueOf(year), EdtfLevel.L1);
60+
return new EdtfYear(BigInteger.valueOf(year), EdtfLevel.L1, 0, false);
61+
}
62+
63+
/**
64+
* L2 Y-notation with significant-digits qualifier, as in
65+
* {@code Y1234S3} (meaning 1234 but only the first three digits
66+
* are significant).
67+
*/
68+
public static EdtfYear ofSignificant(BigInteger year, int significant) {
69+
if (significant < 1) {
70+
throw new IllegalArgumentException(
71+
"significant-digits count must be at least 1");
72+
}
73+
return new EdtfYear(year, EdtfLevel.L2, significant, false);
74+
}
75+
76+
/**
77+
* L2 Y-notation with exponential form, as in {@code Y1E5} (=
78+
* {@code 100000}). The value is fully expanded; only the canonical
79+
* string form uses the exponential syntax.
80+
*/
81+
public static EdtfYear ofExponential(BigInteger year) {
82+
return new EdtfYear(year, EdtfLevel.L2, 0, true);
83+
}
84+
85+
/** L2 Y-notation combining exponential form and significant-digits. */
86+
public static EdtfYear ofExponentialSignificant(BigInteger year, int significant) {
87+
if (significant < 1) {
88+
throw new IllegalArgumentException(
89+
"significant-digits count must be at least 1");
90+
}
91+
return new EdtfYear(year, EdtfLevel.L2, significant, true);
4892
}
4993

5094
public BigInteger year() { return year; }
5195

96+
/** 0 when no {@code S} qualifier; otherwise the significant-digits count. */
97+
public int significantDigits() { return significantDigits; }
98+
99+
/** True for L2 Y-notation rendered in exponential form. */
100+
public boolean exponential() { return exponential; }
101+
52102
@Override public EdtfType type() { return EdtfType.YEAR; }
53103

54104
@Override public EdtfLevel level() { return level; }
@@ -75,16 +125,49 @@ private void checkInLongRange() {
75125
}
76126

77127
@Override public String toEdtfString() {
78-
// L1 Y-notation always carries the 'Y' prefix so the leading-zero
79-
// ambiguity with a four-digit year is resolved.
80-
return "Y" + year.toString();
128+
StringBuilder sb = new StringBuilder();
129+
if (exponential) {
130+
sb.append('Y').append(formatExponential(year));
131+
} else {
132+
sb.append('Y').append(year.toString());
133+
}
134+
if (significantDigits > 0) {
135+
sb.append('S').append(significantDigits);
136+
}
137+
return sb.toString();
138+
}
139+
140+
/**
141+
* Render the value in the {@code coefficient E exponent} form if
142+
* lossless, otherwise fall back to plain digits. This matches
143+
* edtf.js's canonical output for {@code Y…E…} values by stripping
144+
* trailing zeros from the value into the exponent.
145+
*/
146+
private static String formatExponential(BigInteger value) {
147+
BigInteger abs = value.abs();
148+
if (abs.signum() == 0) return "0E0";
149+
String digits = abs.toString();
150+
int trailingZeros = 0;
151+
while (trailingZeros < digits.length() - 1
152+
&& digits.charAt(digits.length() - 1 - trailingZeros) == '0') {
153+
trailingZeros++;
154+
}
155+
String coefficient = digits.substring(0, digits.length() - trailingZeros);
156+
String sign = value.signum() < 0 ? "-" : "";
157+
return sign + coefficient + "E" + trailingZeros;
81158
}
82159

83160
@Override public String toString() { return toEdtfString(); }
84161

85162
@Override public boolean equals(Object o) {
86-
return o instanceof EdtfYear y && y.year.equals(year) && y.level == level;
163+
return o instanceof EdtfYear y
164+
&& y.year.equals(year)
165+
&& y.level == level
166+
&& y.significantDigits == significantDigits
167+
&& y.exponential == exponential;
87168
}
88169

89-
@Override public int hashCode() { return Objects.hash(year, level); }
170+
@Override public int hashCode() {
171+
return Objects.hash(year, level, significantDigits, exponential);
172+
}
90173
}

0 commit comments

Comments
 (0)