Skip to content

Commit 11232a4

Browse files
committed
fixes for issue #16: pandas timestamp limitations
1 parent 704c0d9 commit 11232a4

4 files changed

Lines changed: 54 additions & 49 deletions

File tree

pyhecdss/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
__author__ = """Nicky Sandhu"""
22
__email__ = 'psandhu@water.ca.gov'
3-
__version__ = "0.3.2"
3+
__version__ = "0.4.0"
44
from .pyhecdss import *

pyhecdss/pyhecdss.py

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
import os
55
import time
66
import warnings
7+
from datetime import datetime, timedelta
8+
from calendar import monthrange
9+
from dateutil.parser import parse
710
# some static functions
811

12+
DATE_FMT_STR = '%d%b%Y'
913

1014
def set_message_level(level):
1115
"""
@@ -59,6 +63,11 @@ class DSSFile:
5963
}
6064
EPART_FREQ_MAP = {v: k for k, v in FREQ_EPART_MAP.items()}
6165
#
66+
"""
67+
vectorized version of timedelta
68+
"""
69+
timedelta_minutes=np.vectorize(lambda x: timedelta(minutes=int(x)))
70+
6271

6372
def __init__(self, fname):
6473
self.isopen = False
@@ -216,13 +225,8 @@ def num_values_in_interval(self, sdstr, edstr, istr):
216225
Get number of values in interval istr, using the start date and end date
217226
string
218227
"""
219-
if istr.find('MON') >= 0: # less number of estimates will lead to overestimating values
220-
td = np.timedelta64(int(istr[:istr.find('MON')]), 'M')
221-
elif istr.find('YEAR') >= 0:
222-
td = np.timedelta64(int(istr[:istr.find('YEAR')]), 'Y')
223-
else:
224-
td = pd.to_timedelta(istr)
225-
return int((pd.to_datetime(edstr)-pd.to_datetime(sdstr))/td)+1
228+
td=DSSFile._get_timedelta_for_interval(istr)
229+
return int((parse(edstr)-parse(sdstr))/td)+1
226230

227231
def julian_day(self, date):
228232
"""
@@ -242,39 +246,38 @@ def m2ihm(self, minute):
242246
def parse_pathname_epart(self, pathname):
243247
return pathname.split('/')[1:7][4]
244248

245-
def _number_between(startDateStr, endDateStr, delta=np.timedelta64(1, 'D')):
249+
def _number_between(startDateStr, endDateStr, delta=timedelta(days=1)):
246250
"""
247251
This is just a guess at number of values to be read so going over is ok.
248252
"""
249-
return round((pd.to_datetime(endDateStr)-pd.to_datetime(startDateStr))/delta+1)
250-
251-
def _get_timedelta_unit(epart):
252-
if 'YEAR' in epart:
253-
return 'Y'
254-
elif 'MON' in epart:
255-
return 'M'
256-
elif 'WEEK' in epart:
257-
return 'W'
258-
elif 'DAY' in epart:
259-
return 'D'
260-
elif 'HOUR' in epart:
261-
return 'H'
262-
elif 'MIN' in epart:
263-
return 'm'
253+
return round((parse(endDateStr)-parse(startDateStr))/delta+1)
254+
255+
def _get_timedelta_for_interval(interval):
256+
"""
257+
get minimum timedelta for interval defined by string. e.g. for month it is 28 days (minimum)
258+
"""
259+
if interval.find('MON') >= 0: # less number of estimates will lead to overestimating values
260+
td = timedelta(days=28)
261+
elif interval.find('YEAR') >= 0:
262+
td = timedelta(days=365)
264263
else:
265-
raise Exception(
266-
"Unknown epart to time delta conversion for epart=%s" % epart)
264+
td = timedelta(seconds=DSSFile.EPART_FREQ_MAP[interval].nanos/1e9)
265+
return td
267266

268267
def _pad_to_end_of_block(self, endDateStr, interval):
268+
edate=parse(endDateStr)
269269
if interval.find('MON') >= 0 or interval.find('YEAR') >= 0:
270-
buffer = pd.DateOffset(years=10)
270+
edate=datetime((edate.year//10+1)*10,1,1)
271271
elif interval.find('DAY') >= 0:
272-
buffer = pd.DateOffset(years=1)
272+
edate=datetime(edate.year+1,1,1)
273273
elif interval.find('HOUR') >= 0 or interval.find('MIN') >= 0:
274-
buffer = pd.DateOffset(months=1)
274+
if edate.month == 12:
275+
edate=datetime(edate.year+1,1,1)
276+
else:
277+
edate=datetime(edate.year,edate.month+1,1)
275278
else:
276-
buffer = pd.DateOffset(days=1)
277-
return (pd.to_datetime(endDateStr) + buffer).strftime('%d%b%Y').upper()
279+
edate = edate+timedelta(days=1)
280+
return edate.strftime(DATE_FMT_STR).upper()
278281

279282
def _get_istat_for_zrrtsxd(self, istat):
280283
"""
@@ -352,9 +355,8 @@ def read_rts(self, pathname, startDateStr=None, endDateStr=None):
352355
endDateStr = edate.strip()
353356
endDateStr = self._pad_to_end_of_block(
354357
endDateStr, interval)
355-
nvals = self.num_values_in_interval(
356-
startDateStr, endDateStr, interval)
357-
sdate = pd.to_datetime(startDateStr)
358+
nvals = self.num_values_in_interval(startDateStr, endDateStr, interval)
359+
sdate = parse(startDateStr)
358360
cdate = sdate.date().strftime('%d%b%Y').upper()
359361
ctime = ''.join(sdate.time().isoformat().split(':')[:2])
360362
# PERF: could be np.empty if all initialized
@@ -369,9 +371,9 @@ def read_rts(self, pathname, startDateStr=None, endDateStr=None):
369371
# FIXME: deal with non-zero iofset for period data,i.e. else part of if stmt below
370372
freqoffset = DSSFile.EPART_FREQ_MAP[interval]
371373
if ctype.startswith('INST'):
372-
startDateWithOffset=pd.to_datetime(startDateStr)
374+
startDateWithOffset=parse(startDateStr)
373375
if iofset !=0:
374-
startDateWithOffset=pd.to_datetime(startDateStr)-freqoffset+pd.to_timedelta('%dT'%iofset)
376+
startDateWithOffset=parse(startDateStr)-freqoffset+timedelta(minutes=iofset)
375377
dindex = pd.date_range(
376378
startDateWithOffset, periods=nvals, freq=freqoffset)
377379
else:
@@ -430,10 +432,12 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
430432
if startDateStr == None or endDateStr == None:
431433
raise Exception(
432434
"Either pathname D PART contains timewindow or specify in startDateStr and endDateStr for this call")
433-
startDateStr = (pd.to_datetime(startDateStr) -
434-
pd.offsets.YearBegin(0)).strftime('%d%b%Y').upper()
435-
endDateStr = (pd.to_datetime(endDateStr) +
436-
pd.offsets.YearBegin(0)).strftime('%d%b%Y').upper()
435+
nsdate = parse(startDateStr)
436+
nsbdate= datetime(nsdate.year,1,1)
437+
nedate = parse(endDateStr)
438+
nebdate = datetime(nedate.year,1,1)
439+
startDateStr = nsbdate.strftime(DATE_FMT_STR)
440+
endDateStr = nebdate.strftime(DATE_FMT_STR)
437441
parts[4] = startDateStr+" - "+endDateStr
438442
else:
439443
tw = list(map(lambda x: x.strip(), parts[4].split('-')))
@@ -443,8 +447,7 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
443447
jule, istat = pyheclib.hec_datjul(endDateStr)
444448
ietime = istime = 0
445449
# guess how many values to be read based on e part approximation
446-
ktvals = DSSFile._number_between(startDateStr, endDateStr,
447-
np.timedelta64(1, DSSFile._get_timedelta_unit(epart)))
450+
ktvals = DSSFile._number_between(startDateStr, endDateStr, DSSFile._get_timedelta_for_interval(epart))
448451
ktvals = guess_vals_per_block*int(ktvals)
449452
kdvals = ktvals
450453
itimes = np.zeros(ktvals, 'i')
@@ -456,9 +459,8 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
456459
if nvals == ktvals:
457460
raise Exception(
458461
"More values than guessed! %d. Call with guess_vals_per_block > 10000 " % ktvals)
459-
base_date = pd.to_datetime('31DEC1899')+pd.to_timedelta(ibdate, 'D')
460-
df = pd.DataFrame(dvalues[:nvals], index=pd.to_timedelta(
461-
itimes[:nvals], unit='m')+base_date, columns=[pathname])
462+
base_date = parse('31DEC1899')+timedelta(days=ibdate)
463+
df = pd.DataFrame(dvalues[:nvals], index=base_date+DSSFile.timedelta_minutes(itimes[:nvals]), columns=[pathname])
462464
return df, cunits.strip(), ctype.strip()
463465
# return nvals, dvalues, itimes, base_date, cunits, ctype
464466

@@ -498,7 +500,7 @@ def write_its(self, pathname, df, cunits, ctype, interval=None):
498500
jule, istat = pyheclib.hec_datjul(endDateStr)
499501
ietime = istime = 0
500502
pathname = "/".join(parts)
501-
itimes = df.index-pd.to_datetime(startDateStr)
503+
itimes = df.index-parse(startDateStr)
502504
itimes = itimes.total_seconds()/60 # time in minutes since base date juls
503505
itimes = itimes.values.astype('i') # conver to integer numpy
504506
inflag = 1 # replace data (merging should be done in memory)

tests/test_pyhecdss.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ def setupClass(cls):
1515
os.remove('./test_its1.dsd')
1616
os.remove('./test.dsc')
1717
os.remove('./test.dsd')
18+
os.remove('./test.dsc')
19+
os.remove('./test.dsd')
1820
os.remove('./test.dsk')
1921
@classmethod
2022
def tearDownClass(cls):

tests/test_pyhecdss_intrinsic.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import pandas as pd
44
import numpy as np
55
import pyhecdss
6+
from datetime import timedelta
67
def test_number_between():
7-
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=pd.to_timedelta(1,'D')) > 31
8-
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=np.timedelta64(1,'M')) > 1
9-
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=np.timedelta64(1,'Y')) > 0
8+
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=timedelta(days=1)) > 31
9+
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=timedelta(days=28)) > 1
10+
assert pyhecdss.DSSFile._number_between('01JAN2000','01FEB2000',delta=timedelta(days=365)) > 0

0 commit comments

Comments
 (0)