@@ -98,12 +98,12 @@ def _split_generators(self, dl_manager):
9898 'file_path_srrs2' : urllib .parse .urljoin (BASE_URL , 'srrs2.dat' ),
9999 'file_path_cty' : urllib .parse .urljoin (BASE_URL , 'cty.dat' ),
100100 })
101- return [
102- tfds . core . SplitGenerator (
103- name = tfds . Split . TRAIN ,
104- gen_kwargs = paths ,
101+ return {
102+ 'train' : self . _generate_examples (
103+ file_path_srrs2 = paths [ 'file_path_srrs2' ] ,
104+ file_path_cty = paths [ 'file_path_cty' ] ,
105105 ),
106- ]
106+ }
107107
108108 def _generate_examples (self , file_path_srrs2 , file_path_cty ):
109109 """Yields examples."""
@@ -127,9 +127,13 @@ def _generate_examples(self, file_path_srrs2, file_path_cty):
127127 df = df .drop_duplicates (subset = 'idnum' )
128128 df .drop ('fips' , axis = 1 , inplace = True )
129129
130- df ['wave' ].replace ({' .' : '-1' }, inplace = True )
131- df ['rep' ].replace ({' .' : '-1' }, inplace = True )
132- df ['zip' ].replace ({' ' : '-1' }, inplace = True )
130+ # The raw data uses whitespace padding for missing values (e.g., " ." or
131+ # " "). We cast to string, strip whitespace, and explicitly assign the
132+ # result back to avoid pandas silent mutation failures with inplace dict
133+ # replacements on object columns.
134+ df ['wave' ] = df ['wave' ].astype (str ).str .strip ().replace ('.' , '-1' )
135+ df ['rep' ] = df ['rep' ].astype (str ).str .strip ().replace ('.' , '-1' )
136+ df ['zip' ] = df ['zip' ].astype (str ).str .strip ().replace ('' , '-1' )
133137
134138 for i , (_ , row ) in enumerate (df .iterrows ()):
135139 radon_val = row .pop ('activity' )
0 commit comments