11import duckdb
2- import tempfile
32import os
43import pandas ._testing as tm
54import datetime
109
1110class TestToCSV (object ):
1211 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
13- def test_basic_to_csv (self , pandas ):
14- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
12+ def test_basic_to_csv (self , pandas , tmp_path , default_con ):
13+ temp_file_name = str ( tmp_path / "test.csv" )
1514 df = pandas .DataFrame ({'a' : [5 , 3 , 23 , 2 ], 'b' : [45 , 234 , 234 , 2 ]})
16- rel = duckdb .from_df (df )
15+ rel = default_con .from_df (df )
1716
1817 rel .to_csv (temp_file_name )
1918
20- csv_rel = duckdb .read_csv (temp_file_name )
19+ csv_rel = default_con .read_csv (temp_file_name )
2120 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
2221
2322 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
24- def test_to_csv_sep (self , pandas ):
25- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
23+ def test_to_csv_sep (self , pandas , tmp_path , default_con ):
24+ temp_file_name = str ( tmp_path / "test.csv" )
2625 df = pandas .DataFrame ({'a' : [5 , 3 , 23 , 2 ], 'b' : [45 , 234 , 234 , 2 ]})
27- rel = duckdb .from_df (df )
26+ rel = default_con .from_df (df )
2827
2928 rel .to_csv (temp_file_name , sep = ',' )
3029
31- csv_rel = duckdb .read_csv (temp_file_name , sep = ',' )
30+ csv_rel = default_con .read_csv (temp_file_name , sep = ',' )
3231 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
3332
3433 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
35- def test_to_csv_na_rep (self , pandas ):
36- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
34+ def test_to_csv_na_rep (self , pandas , tmp_path , default_con ):
35+ temp_file_name = str ( tmp_path / "test.csv" )
3736 df = pandas .DataFrame ({'a' : [5 , None , 23 , 2 ], 'b' : [45 , 234 , 234 , 2 ]})
38- rel = duckdb .from_df (df )
37+ rel = default_con .from_df (df )
3938
4039 rel .to_csv (temp_file_name , na_rep = "test" )
4140
42- csv_rel = duckdb .read_csv (temp_file_name , na_values = "test" )
41+ csv_rel = default_con .read_csv (temp_file_name , na_values = "test" )
4342 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
4443
4544 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
46- def test_to_csv_header (self , pandas ):
47- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
45+ def test_to_csv_header (self , pandas , tmp_path , default_con ):
46+ temp_file_name = str ( tmp_path / "test.csv" )
4847 df = pandas .DataFrame ({'a' : [5 , None , 23 , 2 ], 'b' : [45 , 234 , 234 , 2 ]})
49- rel = duckdb .from_df (df )
48+ rel = default_con .from_df (df )
5049
5150 rel .to_csv (temp_file_name )
5251
53- csv_rel = duckdb .read_csv (temp_file_name )
52+ csv_rel = default_con .read_csv (temp_file_name )
5453 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
5554
5655 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
57- def test_to_csv_quotechar (self , pandas ):
58- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
56+ def test_to_csv_quotechar (self , pandas , tmp_path , default_con ):
57+ temp_file_name = str ( tmp_path / "test.csv" )
5958 df = pandas .DataFrame ({'a' : ["\' a,b,c\' " , None , "hello" , "bye" ], 'b' : [45 , 234 , 234 , 2 ]})
60- rel = duckdb .from_df (df )
59+ rel = default_con .from_df (df )
6160
6261 rel .to_csv (temp_file_name , quotechar = '\' ' , sep = ',' )
6362
64- csv_rel = duckdb .read_csv (temp_file_name , sep = ',' , quotechar = '\' ' )
63+ csv_rel = default_con .read_csv (temp_file_name , sep = ',' , quotechar = '\' ' )
6564 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
6665
6766 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
68- def test_to_csv_escapechar (self , pandas ):
69- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
67+ def test_to_csv_escapechar (self , pandas , tmp_path , default_con ):
68+ temp_file_name = str ( tmp_path / "test.csv" )
7069 df = pandas .DataFrame (
7170 {
7271 "c_bool" : [True , False ],
@@ -75,97 +74,102 @@ def test_to_csv_escapechar(self, pandas):
7574 "c_string" : ["a" , "b,c" ],
7675 }
7776 )
78- rel = duckdb .from_df (df )
77+ rel = default_con .from_df (df )
7978 rel .to_csv (temp_file_name , quotechar = '"' , escapechar = '!' )
80- csv_rel = duckdb .read_csv (temp_file_name , quotechar = '"' , escapechar = '!' )
79+ csv_rel = default_con .read_csv (temp_file_name , quotechar = '"' , escapechar = '!' )
8180 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
8281
8382 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
84- def test_to_csv_date_format (self , pandas ):
85- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
83+ def test_to_csv_date_format (self , pandas , tmp_path , default_con ):
84+ temp_file_name = str ( tmp_path / "test.csv" )
8685 df = pandas .DataFrame (getTimeSeriesData ())
8786 dt_index = df .index
8887 df = pandas .DataFrame ({"A" : dt_index , "B" : dt_index .shift (1 )}, index = dt_index )
89- rel = duckdb .from_df (df )
88+ rel = default_con .from_df (df )
9089 rel .to_csv (temp_file_name , date_format = "%Y%m%d" )
9190
92- csv_rel = duckdb .read_csv (temp_file_name , date_format = "%Y%m%d" )
91+ csv_rel = default_con .read_csv (temp_file_name , date_format = "%Y%m%d" )
9392
9493 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
9594
9695 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
97- def test_to_csv_timestamp_format (self , pandas ):
98- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
96+ def test_to_csv_timestamp_format (self , pandas , tmp_path , default_con ):
97+ temp_file_name = str ( tmp_path / "test.csv" )
9998 data = [datetime .time (hour = 23 , minute = 1 , second = 34 , microsecond = 234345 )]
10099 df = pandas .DataFrame ({'0' : pandas .Series (data = data , dtype = 'object' )})
101- rel = duckdb .from_df (df )
100+ rel = default_con .from_df (df )
102101 rel .to_csv (temp_file_name , timestamp_format = '%m/%d/%Y' )
103102
104- csv_rel = duckdb .read_csv (temp_file_name , timestamp_format = '%m/%d/%Y' )
103+ csv_rel = default_con .read_csv (temp_file_name , timestamp_format = '%m/%d/%Y' )
105104
106105 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
107106
108107 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
109- def test_to_csv_quoting_off (self , pandas ):
110- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
108+ def test_to_csv_quoting_off (self , pandas , tmp_path , default_con ):
109+ temp_file_name = str ( tmp_path / "test.csv" )
111110 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
112- rel = duckdb .from_df (df )
111+ rel = default_con .from_df (df )
113112 rel .to_csv (temp_file_name , quoting = None )
114113
115- csv_rel = duckdb .read_csv (temp_file_name )
114+ csv_rel = default_con .read_csv (temp_file_name )
116115 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
117116
118117 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
119- def test_to_csv_quoting_on (self , pandas ):
120- temp_file_name = os . path . join ( tempfile . mkdtemp (), next ( tempfile . _get_candidate_names ()) )
118+ def test_to_csv_quoting_on (self , pandas , tmp_path , default_con ):
119+ temp_file_name = str ( tmp_path / "test.csv" )
121120 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
122- rel = duckdb .from_df (df )
121+ rel = default_con .from_df (df )
123122 rel .to_csv (temp_file_name , quoting = "force" )
124123
125- csv_rel = duckdb .read_csv (temp_file_name )
124+ csv_rel = default_con .read_csv (temp_file_name )
126125 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
127126
128127 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
129- def test_to_csv_quoting_quote_all (self , pandas ):
130- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
128+ def test_to_csv_quoting_quote_all (self , pandas , tmp_path , default_con ):
129+ temp_file_name = str (tmp_path / "test.csv" )
130+
131131 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
132- rel = duckdb .from_df (df )
132+ rel = default_con .from_df (df )
133133 rel .to_csv (temp_file_name , quoting = csv .QUOTE_ALL )
134134
135- csv_rel = duckdb .read_csv (temp_file_name )
135+ csv_rel = default_con .read_csv (temp_file_name )
136136 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
137137
138138 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
139- def test_to_csv_encoding_incorrect (self , pandas ):
140- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
139+ def test_to_csv_encoding_incorrect (self , pandas , tmp_path , default_con ):
140+ temp_file_name = str (tmp_path / "test.csv" )
141+
141142 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
142- rel = duckdb .from_df (df )
143+ rel = default_con .from_df (df )
143144 with pytest .raises (
144145 duckdb .InvalidInputException , match = "Invalid Input Error: The only supported encoding option is 'UTF8"
145146 ):
146147 rel .to_csv (temp_file_name , encoding = "nope" )
147148
148149 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
149- def test_to_csv_encoding_correct (self , pandas ):
150- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
150+ def test_to_csv_encoding_correct (self , pandas , tmp_path , default_con ):
151+ temp_file_name = str (tmp_path / "test.csv" )
152+
151153 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
152- rel = duckdb .from_df (df )
154+ rel = default_con .from_df (df )
153155 rel .to_csv (temp_file_name , encoding = "UTF-8" )
154- csv_rel = duckdb .read_csv (temp_file_name )
156+ csv_rel = default_con .read_csv (temp_file_name )
155157 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
156158
157159 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
158- def test_compression_gzip (self , pandas ):
159- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
160+ def test_compression_gzip (self , pandas , tmp_path , default_con ):
161+ temp_file_name = str (tmp_path / "test.csv" )
162+
160163 df = pandas .DataFrame ({'a' : ['string1' , 'string2' , 'string3' ]})
161- rel = duckdb .from_df (df )
164+ rel = default_con .from_df (df )
162165 rel .to_csv (temp_file_name , compression = "gzip" )
163- csv_rel = duckdb .read_csv (temp_file_name , compression = "gzip" )
166+ csv_rel = default_con .read_csv (temp_file_name , compression = "gzip" )
164167 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
165168
166169 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
167- def test_to_csv_partition (self , pandas ):
168- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
170+ def test_to_csv_partition (self , pandas , tmp_path , default_con ):
171+ temp_file_name = str (tmp_path / "test.csv" )
172+
169173 df = pandas .DataFrame (
170174 {
171175 "c_category" : ['a' , 'a' , 'b' , 'b' ],
@@ -175,9 +179,9 @@ def test_to_csv_partition(self, pandas):
175179 "c_string" : ["a" , "b,c" , "e" , "f" ],
176180 }
177181 )
178- rel = duckdb .from_df (df )
182+ rel = default_con .from_df (df )
179183 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category" ])
180- csv_rel = duckdb .sql (
184+ csv_rel = default_con .sql (
181185 f'''FROM read_csv_auto('{ temp_file_name } /*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
182186 )
183187 expected = [
@@ -190,8 +194,9 @@ def test_to_csv_partition(self, pandas):
190194 assert csv_rel .execute ().fetchall () == expected
191195
192196 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
193- def test_to_csv_partition_with_columns_written (self , pandas ):
194- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
197+ def test_to_csv_partition_with_columns_written (self , pandas , tmp_path , default_con ):
198+ temp_file_name = str (tmp_path / "test.csv" )
199+
195200 df = pandas .DataFrame (
196201 {
197202 "c_category" : ['a' , 'a' , 'b' , 'b' ],
@@ -201,17 +206,18 @@ def test_to_csv_partition_with_columns_written(self, pandas):
201206 "c_string" : ["a" , "b,c" , "e" , "f" ],
202207 }
203208 )
204- rel = duckdb .from_df (df )
205- res = duckdb .sql ("FROM rel order by all" )
209+ rel = default_con .from_df (df )
210+ res = default_con .sql ("FROM rel order by all" )
206211 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category" ], write_partition_columns = True )
207- csv_rel = duckdb .sql (
212+ csv_rel = default_con .sql (
208213 f'''FROM read_csv_auto('{ temp_file_name } /*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
209214 )
210215 assert res .execute ().fetchall () == csv_rel .execute ().fetchall ()
211216
212217 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
213- def test_to_csv_overwrite (self , pandas ):
214- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
218+ def test_to_csv_overwrite (self , pandas , tmp_path , default_con ):
219+ temp_file_name = str (tmp_path / "test.csv" )
220+
215221 df = pandas .DataFrame (
216222 {
217223 "c_category_1" : ['a' , 'a' , 'b' , 'b' ],
@@ -222,10 +228,10 @@ def test_to_csv_overwrite(self, pandas):
222228 "c_string" : ["a" , "b,c" , "e" , "f" ],
223229 }
224230 )
225- rel = duckdb .from_df (df )
231+ rel = default_con .from_df (df )
226232 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category_1" ]) # csv to be overwritten
227233 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category_1" ], overwrite = True )
228- csv_rel = duckdb .sql (
234+ csv_rel = default_con .sql (
229235 f'''FROM read_csv_auto('{ temp_file_name } /*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
230236 )
231237 # When partition columns are read from directory names, column order become different from original
@@ -238,8 +244,9 @@ def test_to_csv_overwrite(self, pandas):
238244 assert csv_rel .execute ().fetchall () == expected
239245
240246 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
241- def test_to_csv_overwrite_with_columns_written (self , pandas ):
242- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
247+ def test_to_csv_overwrite_with_columns_written (self , pandas , tmp_path , default_con ):
248+ temp_file_name = str (tmp_path / "test.csv" )
249+
243250 df = pandas .DataFrame (
244251 {
245252 "c_category_1" : ['a' , 'a' , 'b' , 'b' ],
@@ -250,22 +257,23 @@ def test_to_csv_overwrite_with_columns_written(self, pandas):
250257 "c_string" : ["a" , "b,c" , "e" , "f" ],
251258 }
252259 )
253- rel = duckdb .from_df (df )
260+ rel = default_con .from_df (df )
254261 rel .to_csv (
255262 temp_file_name , header = True , partition_by = ["c_category_1" ], write_partition_columns = True
256263 ) # csv to be overwritten
257264 rel .to_csv (
258265 temp_file_name , header = True , partition_by = ["c_category_1" ], overwrite = True , write_partition_columns = True
259266 )
260- csv_rel = duckdb .sql (
267+ csv_rel = default_con .sql (
261268 f'''FROM read_csv_auto('{ temp_file_name } /*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
262269 )
263- res = duckdb .sql ("FROM rel order by all" )
270+ res = default_con .sql ("FROM rel order by all" )
264271 assert res .execute ().fetchall () == csv_rel .execute ().fetchall ()
265272
266273 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
267- def test_to_csv_overwrite_not_enabled (self , pandas ):
268- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
274+ def test_to_csv_overwrite_not_enabled (self , pandas , tmp_path , default_con ):
275+ temp_file_name = str (tmp_path / "test.csv" )
276+
269277 df = pandas .DataFrame (
270278 {
271279 "c_category_1" : ['a' , 'a' , 'b' , 'b' ],
@@ -276,15 +284,16 @@ def test_to_csv_overwrite_not_enabled(self, pandas):
276284 "c_string" : ["a" , "b,c" , "e" , "f" ],
277285 }
278286 )
279- rel = duckdb .from_df (df )
287+ rel = default_con .from_df (df )
280288 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category_1" ])
281289 with pytest .raises (duckdb .IOException , match = "OVERWRITE" ):
282290 rel .to_csv (temp_file_name , header = True , partition_by = ["c_category_1" ])
283291
284292 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
285- def test_to_csv_per_thread_output (self , pandas ):
286- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
287- num_threads = duckdb .sql ("select current_setting('threads')" ).fetchone ()[0 ]
293+ def test_to_csv_per_thread_output (self , pandas , tmp_path , default_con ):
294+ temp_file_name = str (tmp_path / "test.csv" )
295+
296+ num_threads = default_con .sql ("select current_setting('threads')" ).fetchone ()[0 ]
288297 print ('num_threads:' , num_threads )
289298 df = pandas .DataFrame (
290299 {
@@ -295,14 +304,15 @@ def test_to_csv_per_thread_output(self, pandas):
295304 "c_string" : ["a" , "b,c" , "e" , "f" ],
296305 }
297306 )
298- rel = duckdb .from_df (df )
307+ rel = default_con .from_df (df )
299308 rel .to_csv (temp_file_name , header = True , per_thread_output = True )
300- csv_rel = duckdb .read_csv (f'{ temp_file_name } /*.csv' , header = True )
309+ csv_rel = default_con .read_csv (f'{ temp_file_name } /*.csv' , header = True )
301310 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
302311
303312 @pytest .mark .parametrize ('pandas' , [NumpyPandas (), ArrowPandas ()])
304- def test_to_csv_use_tmp_file (self , pandas ):
305- temp_file_name = os .path .join (tempfile .mkdtemp (), next (tempfile ._get_candidate_names ()))
313+ def test_to_csv_use_tmp_file (self , pandas , tmp_path , default_con ):
314+ temp_file_name = str (tmp_path / "test.csv" )
315+
306316 df = pandas .DataFrame (
307317 {
308318 "c_category_1" : ['a' , 'a' , 'b' , 'b' ],
@@ -313,8 +323,8 @@ def test_to_csv_use_tmp_file(self, pandas):
313323 "c_string" : ["a" , "b,c" , "e" , "f" ],
314324 }
315325 )
316- rel = duckdb .from_df (df )
326+ rel = default_con .from_df (df )
317327 rel .to_csv (temp_file_name , header = True ) # csv to be overwritten
318328 rel .to_csv (temp_file_name , header = True , use_tmp_file = True )
319- csv_rel = duckdb .read_csv (temp_file_name , header = True )
329+ csv_rel = default_con .read_csv (temp_file_name , header = True )
320330 assert rel .execute ().fetchall () == csv_rel .execute ().fetchall ()
0 commit comments