44import os
55import time
66import warnings
7+ from datetime import datetime , timedelta
8+ from calendar import monthrange
9+ from dateutil .parser import parse
710# some static functions
811
12+ DATE_FMT_STR = '%d%b%Y'
913
1014def set_message_level (level ):
1115 """
@@ -59,6 +63,11 @@ class DSSFile:
5963 }
6064 EPART_FREQ_MAP = {v : k for k , v in FREQ_EPART_MAP .items ()}
6165 #
66+ """
67+ vectorized version of timedelta
68+ """
69+ timedelta_minutes = np .vectorize (lambda x : timedelta (minutes = int (x )))
70+
6271
6372 def __init__ (self , fname ):
6473 self .isopen = False
@@ -216,13 +225,8 @@ def num_values_in_interval(self, sdstr, edstr, istr):
216225 Get number of values in interval istr, using the start date and end date
217226 string
218227 """
219- if istr .find ('MON' ) >= 0 : # less number of estimates will lead to overestimating values
220- td = np .timedelta64 (int (istr [:istr .find ('MON' )]), 'M' )
221- elif istr .find ('YEAR' ) >= 0 :
222- td = np .timedelta64 (int (istr [:istr .find ('YEAR' )]), 'Y' )
223- else :
224- td = pd .to_timedelta (istr )
225- return int ((pd .to_datetime (edstr )- pd .to_datetime (sdstr ))/ td )+ 1
228+ td = DSSFile ._get_timedelta_for_interval (istr )
229+ return int ((parse (edstr )- parse (sdstr ))/ td )+ 1
226230
227231 def julian_day (self , date ):
228232 """
@@ -242,39 +246,38 @@ def m2ihm(self, minute):
242246 def parse_pathname_epart (self , pathname ):
243247 return pathname .split ('/' )[1 :7 ][4 ]
244248
245- def _number_between (startDateStr , endDateStr , delta = np . timedelta64 ( 1 , 'D' )):
249+ def _number_between (startDateStr , endDateStr , delta = timedelta ( days = 1 )):
246250 """
247251 This is just a guess at number of values to be read so going over is ok.
248252 """
249- return round ((pd .to_datetime (endDateStr )- pd .to_datetime (startDateStr ))/ delta + 1 )
250-
251- def _get_timedelta_unit (epart ):
252- if 'YEAR' in epart :
253- return 'Y'
254- elif 'MON' in epart :
255- return 'M'
256- elif 'WEEK' in epart :
257- return 'W'
258- elif 'DAY' in epart :
259- return 'D'
260- elif 'HOUR' in epart :
261- return 'H'
262- elif 'MIN' in epart :
263- return 'm'
253+ return round ((parse (endDateStr )- parse (startDateStr ))/ delta + 1 )
254+
255+ def _get_timedelta_for_interval (interval ):
256+ """
257+ get minimum timedelta for interval defined by string. e.g. for month it is 28 days (minimum)
258+ """
259+ if interval .find ('MON' ) >= 0 : # less number of estimates will lead to overestimating values
260+ td = timedelta (days = 28 )
261+ elif interval .find ('YEAR' ) >= 0 :
262+ td = timedelta (days = 365 )
264263 else :
265- raise Exception (
266- "Unknown epart to time delta conversion for epart=%s" % epart )
264+ td = timedelta ( seconds = DSSFile . EPART_FREQ_MAP [ interval ]. nanos / 1e9 )
265+ return td
267266
268267 def _pad_to_end_of_block (self , endDateStr , interval ):
268+ edate = parse (endDateStr )
269269 if interval .find ('MON' ) >= 0 or interval .find ('YEAR' ) >= 0 :
270- buffer = pd . DateOffset ( years = 10 )
270+ edate = datetime (( edate . year // 10 + 1 ) * 10 , 1 , 1 )
271271 elif interval .find ('DAY' ) >= 0 :
272- buffer = pd . DateOffset ( years = 1 )
272+ edate = datetime ( edate . year + 1 , 1 , 1 )
273273 elif interval .find ('HOUR' ) >= 0 or interval .find ('MIN' ) >= 0 :
274- buffer = pd .DateOffset (months = 1 )
274+ if edate .month == 12 :
275+ edate = datetime (edate .year + 1 ,1 ,1 )
276+ else :
277+ edate = datetime (edate .year ,edate .month + 1 ,1 )
275278 else :
276- buffer = pd . DateOffset (days = 1 )
277- return ( pd . to_datetime ( endDateStr ) + buffer ). strftime ('%d%b%Y' ).upper ()
279+ edate = edate + timedelta (days = 1 )
280+ return edate . strftime (DATE_FMT_STR ).upper ()
278281
279282 def _get_istat_for_zrrtsxd (self , istat ):
280283 """
@@ -352,9 +355,8 @@ def read_rts(self, pathname, startDateStr=None, endDateStr=None):
352355 endDateStr = edate .strip ()
353356 endDateStr = self ._pad_to_end_of_block (
354357 endDateStr , interval )
355- nvals = self .num_values_in_interval (
356- startDateStr , endDateStr , interval )
357- sdate = pd .to_datetime (startDateStr )
358+ nvals = self .num_values_in_interval (startDateStr , endDateStr , interval )
359+ sdate = parse (startDateStr )
358360 cdate = sdate .date ().strftime ('%d%b%Y' ).upper ()
359361 ctime = '' .join (sdate .time ().isoformat ().split (':' )[:2 ])
360362 # PERF: could be np.empty if all initialized
@@ -369,9 +371,9 @@ def read_rts(self, pathname, startDateStr=None, endDateStr=None):
369371 # FIXME: deal with non-zero iofset for period data,i.e. else part of if stmt below
370372 freqoffset = DSSFile .EPART_FREQ_MAP [interval ]
371373 if ctype .startswith ('INST' ):
372- startDateWithOffset = pd . to_datetime (startDateStr )
374+ startDateWithOffset = parse (startDateStr )
373375 if iofset != 0 :
374- startDateWithOffset = pd . to_datetime (startDateStr )- freqoffset + pd . to_timedelta ( '%dT' % iofset )
376+ startDateWithOffset = parse (startDateStr )- freqoffset + timedelta ( minutes = iofset )
375377 dindex = pd .date_range (
376378 startDateWithOffset , periods = nvals , freq = freqoffset )
377379 else :
@@ -430,10 +432,12 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
430432 if startDateStr == None or endDateStr == None :
431433 raise Exception (
432434 "Either pathname D PART contains timewindow or specify in startDateStr and endDateStr for this call" )
433- startDateStr = (pd .to_datetime (startDateStr ) -
434- pd .offsets .YearBegin (0 )).strftime ('%d%b%Y' ).upper ()
435- endDateStr = (pd .to_datetime (endDateStr ) +
436- pd .offsets .YearBegin (0 )).strftime ('%d%b%Y' ).upper ()
435+ nsdate = parse (startDateStr )
436+ nsbdate = datetime (nsdate .year ,1 ,1 )
437+ nedate = parse (endDateStr )
438+ nebdate = datetime (nedate .year ,1 ,1 )
439+ startDateStr = nsbdate .strftime (DATE_FMT_STR )
440+ endDateStr = nebdate .strftime (DATE_FMT_STR )
437441 parts [4 ] = startDateStr + " - " + endDateStr
438442 else :
439443 tw = list (map (lambda x : x .strip (), parts [4 ].split ('-' )))
@@ -443,8 +447,7 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
443447 jule , istat = pyheclib .hec_datjul (endDateStr )
444448 ietime = istime = 0
445449 # guess how many values to be read based on e part approximation
446- ktvals = DSSFile ._number_between (startDateStr , endDateStr ,
447- np .timedelta64 (1 , DSSFile ._get_timedelta_unit (epart )))
450+ ktvals = DSSFile ._number_between (startDateStr , endDateStr , DSSFile ._get_timedelta_for_interval (epart ))
448451 ktvals = guess_vals_per_block * int (ktvals )
449452 kdvals = ktvals
450453 itimes = np .zeros (ktvals , 'i' )
@@ -456,9 +459,8 @@ def read_its(self, pathname, startDateStr=None, endDateStr=None, guess_vals_per_
456459 if nvals == ktvals :
457460 raise Exception (
458461 "More values than guessed! %d. Call with guess_vals_per_block > 10000 " % ktvals )
459- base_date = pd .to_datetime ('31DEC1899' )+ pd .to_timedelta (ibdate , 'D' )
460- df = pd .DataFrame (dvalues [:nvals ], index = pd .to_timedelta (
461- itimes [:nvals ], unit = 'm' )+ base_date , columns = [pathname ])
462+ base_date = parse ('31DEC1899' )+ timedelta (days = ibdate )
463+ df = pd .DataFrame (dvalues [:nvals ], index = base_date + DSSFile .timedelta_minutes (itimes [:nvals ]), columns = [pathname ])
462464 return df , cunits .strip (), ctype .strip ()
463465 # return nvals, dvalues, itimes, base_date, cunits, ctype
464466
@@ -498,7 +500,7 @@ def write_its(self, pathname, df, cunits, ctype, interval=None):
498500 jule , istat = pyheclib .hec_datjul (endDateStr )
499501 ietime = istime = 0
500502 pathname = "/" .join (parts )
501- itimes = df .index - pd . to_datetime (startDateStr )
503+ itimes = df .index - parse (startDateStr )
502504 itimes = itimes .total_seconds ()/ 60 # time in minutes since base date juls
503505 itimes = itimes .values .astype ('i' ) # conver to integer numpy
504506 inflag = 1 # replace data (merging should be done in memory)
0 commit comments