Skip to content

Commit d5b3c60

Browse files
Cleanup handling of TZ name parsing
Fully support the format where a TZ name is in parentheses after the time (and possibly after an offset). This fixes the broken case where a 4 character TZ name was in parentheses after a time.
1 parent c4de5d4 commit d5b3c60

2 files changed

Lines changed: 109 additions & 70 deletions

File tree

parseany.go

Lines changed: 95 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1895,9 +1895,8 @@ iterRunes:
18951895
if !p.setYear() {
18961896
return p, p.unknownErr(datestr)
18971897
}
1898-
} else {
1899-
// allow multiple trailing whitespace
19001898
}
1899+
// else allow multiple trailing whitespace
19011900
case '+', '-':
19021901
// The year must be followed by a space before an offset!
19031902
if p.yearlen > 0 {
@@ -1942,12 +1941,10 @@ iterRunes:
19421941
} else {
19431942
p.tzlen = i - p.tzi
19441943
}
1945-
if p.tzlen == 4 {
1946-
p.set(p.tzi, " MST")
1947-
} else if p.tzlen == 3 {
1948-
p.set(p.tzi, "MST")
1949-
} else if p.tzlen > 0 {
1950-
return p, p.unknownErr(datestr)
1944+
if p.tzlen > 0 {
1945+
if err := p.setTZName(datestr); err != nil {
1946+
return p, err
1947+
}
19511948
}
19521949
p.stateTime = timeWsAlphaZoneOffset
19531950
p.offseti = i
@@ -1956,12 +1953,8 @@ iterRunes:
19561953
// 17:57:51 MST
19571954
// 06:20:00 (EST)
19581955
p.tzlen = i - p.tzi
1959-
if p.tzlen == 4 {
1960-
p.set(p.tzi, " MST")
1961-
} else if p.tzlen == 3 {
1962-
p.set(p.tzi, "MST")
1963-
} else if p.tzlen > 0 {
1964-
return p, p.unknownErr(datestr)
1956+
if err := p.setTZName(datestr); err != nil {
1957+
return p, err
19651958
}
19661959
if r == ' ' {
19671960
p.stateTime = timeWsAlphaWs
@@ -2205,19 +2198,8 @@ iterRunes:
22052198
case r == ' ':
22062199
if p.tzi > 0 {
22072200
p.tzlen = i - p.tzi
2208-
switch p.tzlen {
2209-
case 3:
2210-
// 13:31:51.999 +01:00 CET
2211-
p.set(p.tzi, "MST")
2212-
case 4:
2213-
// 13:31:51.999 +01:00 CEST
2214-
p.set(p.tzi, "MST ")
2215-
default:
2216-
if p.simpleErrorMessages {
2217-
return p, ErrUnknownTimeZone
2218-
} else {
2219-
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen])
2220-
}
2201+
if err := p.setTZName(datestr); err != nil {
2202+
return p, err
22212203
}
22222204
} else {
22232205
return p, p.unknownErr(datestr)
@@ -2353,18 +2335,9 @@ iterRunes:
23532335

23542336
switch p.stateTime {
23552337
case timeWsAlpha:
2356-
switch len(p.datestr) - p.tzi {
2357-
case 3:
2358-
// 13:31:51.999 +01:00 CET
2359-
p.set(p.tzi, "MST")
2360-
case 4:
2361-
p.set(p.tzi, "MST ")
2362-
default:
2363-
if p.simpleErrorMessages {
2364-
return p, ErrUnknownTimeZone
2365-
} else {
2366-
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:])
2367-
}
2338+
p.tzlen = i - p.tzi
2339+
if err := p.setTZName(datestr); err != nil {
2340+
return p, err
23682341
}
23692342

23702343
case timeWsAlphaRParen:
@@ -2377,10 +2350,26 @@ iterRunes:
23772350
}
23782351
case timeWsOffsetWsTZDescInParen:
23792352
// The last character must be a closing ')'
2380-
if len(p.datestr) <= 0 || p.datestr[i-1] != ')' {
2353+
if i <= 0 || p.datestr[i-1] != ')' {
23812354
return p, p.unknownErr(datestr)
23822355
}
2383-
p.trimExtra(false)
2356+
// As a special case, if we don't yet have a timezone name,
2357+
// and the content in the paren is 3-4 characters, then treat
2358+
// this as a time zone name instead
2359+
if len(p.datestr) >= p.extra+1+3+1 {
2360+
parenContentsLen := (i - 1) - (p.extra + 2)
2361+
if p.tzi == 0 && (parenContentsLen >= 3 && parenContentsLen <= 4) {
2362+
p.tzi = p.extra + 2
2363+
p.tzlen = parenContentsLen
2364+
if err := p.setTZName(datestr); err != nil {
2365+
return p, err
2366+
}
2367+
p.extra = 0
2368+
}
2369+
}
2370+
if p.extra > 0 {
2371+
p.trimExtra(false)
2372+
}
23842373
case timeWsAlphaZoneOffset:
23852374
// 06:20:00 UTC-05
23862375
if err := p.setTZOffset(i, datestr); err != nil {
@@ -2418,19 +2407,9 @@ iterRunes:
24182407
case timeWsOffsetWsAlphaZone:
24192408
// 00:12:00 +0000 UTC
24202409
if p.tzi > 0 {
2421-
switch len(p.datestr) - p.tzi {
2422-
case 3:
2423-
// 13:31:51.999 +01:00 CET
2424-
p.set(p.tzi, "MST")
2425-
case 4:
2426-
// 13:31:51.999 +01:00 CEST
2427-
p.set(p.tzi, "MST ")
2428-
default:
2429-
if p.simpleErrorMessages {
2430-
return p, ErrUnknownTimeZone
2431-
} else {
2432-
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:])
2433-
}
2410+
p.tzlen = i - p.tzi
2411+
if err := p.setTZName(datestr); err != nil {
2412+
return p, err
24342413
}
24352414
} else {
24362415
return p, p.unknownErr(datestr)
@@ -2940,6 +2919,44 @@ func (p *parser) setTZOffset(i int, datestr string) error {
29402919
return nil
29412920
}
29422921

2922+
func (p *parser) setTZName(datestr string) error {
2923+
switch p.tzlen {
2924+
case 3:
2925+
p.set(p.tzi, "MST")
2926+
case 4:
2927+
p.set(p.tzi, "MST ")
2928+
default:
2929+
if p.simpleErrorMessages {
2930+
return ErrUnknownTimeZone
2931+
} else {
2932+
return fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen])
2933+
}
2934+
}
2935+
return nil
2936+
}
2937+
2938+
// Removes the characters at the given range from the format string.
2939+
// Fills the end of the format string with spaces rather than shortening it.
2940+
func (p *parser) removeRangeFromFormat(i, numBytes int) {
2941+
if i < 0 || i >= len(p.format) {
2942+
return
2943+
}
2944+
var startErase int
2945+
afterRemovedRange := i + numBytes
2946+
bytesToCopy := len(p.format) - afterRemovedRange
2947+
if bytesToCopy <= 0 {
2948+
// nothing to copy, erase everything from the removal point
2949+
startErase = i
2950+
} else {
2951+
copy(p.format[i:], p.format[afterRemovedRange:])
2952+
startErase = i + bytesToCopy
2953+
}
2954+
// fill in spaces to erase the moved content in its old location
2955+
for index := startErase; index < len(p.format); index++ {
2956+
p.format[index] = ' '
2957+
}
2958+
}
2959+
29432960
// Find the proper end of the current component (scanning chars starting from start and going
29442961
// up until the end, and either returning at end or returning the first character that is
29452962
// not allowed, as determined by allowNumeric, allowAlpha, and allowOther)
@@ -3097,6 +3114,26 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
30973114
if p.t != nil {
30983115
return *p.t, nil
30993116
}
3117+
3118+
// Make sure that the entire string matched to a known format that was detected
3119+
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
3120+
// We can always ignore punctuation at the end of a date/time, but do not allow
3121+
// any numbers or letters in the format string.
3122+
validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true)
3123+
if validFormatTo < len(p.format) {
3124+
return time.Time{}, p.unexpectedTail(p.formatSetLen)
3125+
}
3126+
}
3127+
3128+
// Special case where the TZ name is 4 characters long and followed by punctuation, will cause parsing problems
3129+
// with the format 'MST ' (will expect a whitespace that isn't there after 4 char timezone). Most robust
3130+
// solution is to remove the extra whitespace. Even though it will cause offsets after this point to not match
3131+
// between the datestr and format string, it's not an issue at this point.
3132+
if p.tzlen == 4 && p.tzi+4 < len(p.format) && p.format[p.tzi+3] == ' ' && p.format[p.tzi+4] != ' ' {
3133+
p.removeRangeFromFormat(p.tzi+3, 1)
3134+
}
3135+
3136+
// If we have a full month name, update the format string to use it (can change length of format string)
31003137
if len(p.fullMonth) > 0 {
31013138
p.setFullMonth(p.fullMonth)
31023139
}
@@ -3110,7 +3147,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
31103147
// get out of this function to reduce scope it needs to be applied on
31113148
if err != nil && strings.Contains(err.Error(), "month out of range") {
31123149
// simple optimized case where mm and dd can be swapped directly
3113-
if p.molen == 2 && p.daylen == 2 {
3150+
if p.molen == 2 && p.daylen == 2 && len(p.fullMonth) <= 0 && (p.tzi == 0 || (p.moi < p.tzi && p.dayi < p.tzi)) {
31143151
// skipped bytes have already been removed, so compensate for that
31153152
moi := p.moi - p.skip
31163153
p.moi = p.dayi - p.skip
@@ -3144,17 +3181,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
31443181
}()
31453182
}
31463183

3147-
// Make sure that the entire string matched to a known format that was detected
3148-
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
3149-
// We can always ignore punctuation at the end of a date/time, but do not allow
3150-
// any numbers or letters in the format string.
3151-
validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true)
3152-
if validFormatTo < len(p.format) {
3153-
return time.Time{}, p.unexpectedTail(p.formatSetLen)
3154-
}
3184+
if p.skip > len(p.format) {
3185+
p.skip = len(p.format)
31553186
}
3156-
3157-
if p.skip > 0 && len(p.format) > p.skip {
3187+
if p.skip > 0 {
31583188
// copy and then re-slice to shorten to avoid losing the header of the pooled format string
31593189
copy(p.format, p.format[p.skip:])
31603190
p.format = p.format[:len(p.format)-p.skip]

parseany_test.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,11 @@ var testInputs = []dateTest{
225225
{in: "Thu, 03 Jul 2017 8:08:04 +0100", out: "2017-07-03 07:08:04 +0000 UTC"},
226226
{in: "Thu, 03 Jul 2017 8:8:4 +0100", out: "2017-07-03 07:08:04 +0000 UTC"},
227227
//
228-
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC"},
229-
{in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC"},
228+
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", zname: "CEST"},
229+
{in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC", zname: "MST"},
230230
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"},
231+
{in: "Tue, 11 Jul 2017 04:08:03 (CEST)", out: "2017-07-11 04:08:03 +0000 UTC", zname: "CEST"},
232+
{in: "Tue, 5 Jul 2017 04:08:03 (MST)", out: "2017-07-05 04:08:03 +0000 UTC", zname: "MST"},
231233
// day, dd-Mon-yy hh:mm:zz TZ
232234
{in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"},
233235
{in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"},
@@ -330,14 +332,18 @@ var testInputs = []dateTest{
330332
{in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"},
331333
{in: "04/02/2014 04:08:09AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
332334
{in: "04/02/2014 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
335+
{in: "04/02/2014 04:08:09 AM (PST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
333336
{in: "04/02/2014 04:08:09AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
334337
{in: "04/02/2014 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
338+
{in: "04/02/2014 04:08:09 AM (CEST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
335339
{in: "04/02/2014 04:08:09pm", out: "2014-04-02 16:08:09 +0000 UTC"},
336340
{in: "04/02/2014 04:08:09 PM", out: "2014-04-02 16:08:09 +0000 UTC"},
337341
{in: "04/02/2014 04:08:09PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
338342
{in: "04/02/2014 04:08:09 PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
343+
{in: "04/02/2014 04:08:09 PM (PST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
339344
{in: "04/02/2014 04:08:09pm CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
340345
{in: "04/02/2014 04:08:09 PM CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
346+
{in: "04/02/2014 04:08:09 PM (CEST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
341347
{in: "04/02/2014 04:08am", out: "2014-04-02 04:08:00 +0000 UTC"},
342348
{in: "04/02/2014 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"},
343349
{in: "04/02/2014 04:08pm", out: "2014-04-02 16:08:00 +0000 UTC"},
@@ -822,7 +828,7 @@ func TestParse(t *testing.T) {
822828
}
823829
fullInput := prefix + th.in
824830

825-
t.Run(fmt.Sprintf("simpleerr-%v-addweekday-%v-%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) {
831+
t.Run(fmt.Sprintf("simpleerr-%v/addweekday-%v/%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) {
826832
var ts time.Time
827833
defer func() {
828834
if r := recover(); r != nil {
@@ -1167,6 +1173,9 @@ func TestInLocation(t *testing.T) {
11671173
ts = MustParse("Tue, 5 Jul 2017 16:28:13 -0700 (MST)")
11681174
assert.Equal(t, "2017-07-05 23:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
11691175

1176+
ts = MustParse("Tue, 5 Jul 2017 16:28:13 +0300 (CEST)")
1177+
assert.Equal(t, "2017-07-05 13:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
1178+
11701179
// Now we are going to use ParseIn() and see that it gives different answer
11711180
// with different zone, offset
11721181
time.Local = nil
@@ -1311,6 +1320,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) {
13111320

13121321
// Convenience function for debugging a particular broken test case
13131322
func TestDebug(t *testing.T) {
1314-
ts := MustParse("Monday 19/03/2012 00:00:00", RetryAmbiguousDateWithSwap(true))
1315-
assert.Equal(t, "2012-03-19 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
1323+
ts := MustParse("September 17, 2012 at 10:09am CEST+02", RetryAmbiguousDateWithSwap(true))
1324+
assert.Equal(t, "2012-09-17 08:09:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
13161325
}

0 commit comments

Comments
 (0)