Skip to content

Commit b87a2c2

Browse files
SunsetWolfLinlang Lv (iSoftStone Information)
andauthored
fix: value error caused by incorrect date format in daily data (#2015)
Co-authored-by: Linlang Lv (iSoftStone Information) <v-llv@microsoft.com>
1 parent 3097dcc commit b87a2c2

1 file changed

Lines changed: 21 additions & 8 deletions

File tree

scripts/data_collector/base.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,20 @@ def __init__(
280280
self._symbol_field_name = symbol_field_name
281281
self._end_date = kwargs.get("end_date", None)
282282
self._max_workers = max_workers
283+
self.interval = kwargs.get("interval", "1d")
283284

284285
self._normalize_obj = normalize_class(
285286
date_field_name=date_field_name, symbol_field_name=symbol_field_name, **kwargs
286287
)
287288

289+
def format_data(self, df: pd.DataFrame):
290+
if self.interval == "1d":
291+
try:
292+
pd.to_datetime(df.iloc[-1]["date"], format="%Y-%m-%d", errors="raise")
293+
except Exception:
294+
df = df.iloc[:-1]
295+
return df
296+
288297
def _executor(self, file_path: Path):
289298
file_path = Path(file_path)
290299

@@ -300,14 +309,18 @@ def _executor(self, file_path: Path):
300309
keep_default_na=False,
301310
na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns},
302311
)
303-
304-
# NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
305-
df = self._normalize_obj.normalize(df)
306-
if df is not None and not df.empty:
307-
if self._end_date is not None:
308-
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
309-
df = df[_mask]
310-
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
312+
df = self.format_data(df=df)
313+
314+
if not df.empty:
315+
# NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified.
316+
df = self._normalize_obj.normalize(df)
317+
if df is not None and not df.empty:
318+
if self._end_date is not None:
319+
_mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date)
320+
df = df[_mask]
321+
df.to_csv(self._target_dir.joinpath(file_path.name), index=False)
322+
else:
323+
logger.warning(f"{file_path.stem} source data is empty and will not undergo normalization processing.")
311324

312325
def normalize(self):
313326
logger.info("normalize data......")

0 commit comments

Comments
 (0)