Skip to content

Commit 3922560

Browse files
committed
tests: use a tmp_path fixture to isolate test data
1 parent 5bac379 commit 3922560

3 files changed

Lines changed: 113 additions & 114 deletions

File tree

tests/fast/api/test_to_csv.py

Lines changed: 96 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import duckdb
2-
import tempfile
32
import os
43
import pandas._testing as tm
54
import datetime
@@ -10,63 +9,63 @@
109

1110
class TestToCSV(object):
1211
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
13-
def test_basic_to_csv(self, pandas):
14-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
12+
def test_basic_to_csv(self, pandas, tmp_path, default_con):
13+
temp_file_name = str(tmp_path / "test.csv")
1514
df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]})
16-
rel = duckdb.from_df(df)
15+
rel = default_con.from_df(df)
1716

1817
rel.to_csv(temp_file_name)
1918

20-
csv_rel = duckdb.read_csv(temp_file_name)
19+
csv_rel = default_con.read_csv(temp_file_name)
2120
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
2221

2322
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
24-
def test_to_csv_sep(self, pandas):
25-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
23+
def test_to_csv_sep(self, pandas, tmp_path, default_con):
24+
temp_file_name = str(tmp_path / "test.csv")
2625
df = pandas.DataFrame({'a': [5, 3, 23, 2], 'b': [45, 234, 234, 2]})
27-
rel = duckdb.from_df(df)
26+
rel = default_con.from_df(df)
2827

2928
rel.to_csv(temp_file_name, sep=',')
3029

31-
csv_rel = duckdb.read_csv(temp_file_name, sep=',')
30+
csv_rel = default_con.read_csv(temp_file_name, sep=',')
3231
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
3332

3433
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
35-
def test_to_csv_na_rep(self, pandas):
36-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
34+
def test_to_csv_na_rep(self, pandas, tmp_path, default_con):
35+
temp_file_name = str(tmp_path / "test.csv")
3736
df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]})
38-
rel = duckdb.from_df(df)
37+
rel = default_con.from_df(df)
3938

4039
rel.to_csv(temp_file_name, na_rep="test")
4140

42-
csv_rel = duckdb.read_csv(temp_file_name, na_values="test")
41+
csv_rel = default_con.read_csv(temp_file_name, na_values="test")
4342
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
4443

4544
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
46-
def test_to_csv_header(self, pandas):
47-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
45+
def test_to_csv_header(self, pandas, tmp_path, default_con):
46+
temp_file_name = str(tmp_path / "test.csv")
4847
df = pandas.DataFrame({'a': [5, None, 23, 2], 'b': [45, 234, 234, 2]})
49-
rel = duckdb.from_df(df)
48+
rel = default_con.from_df(df)
5049

5150
rel.to_csv(temp_file_name)
5251

53-
csv_rel = duckdb.read_csv(temp_file_name)
52+
csv_rel = default_con.read_csv(temp_file_name)
5453
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
5554

5655
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
57-
def test_to_csv_quotechar(self, pandas):
58-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
56+
def test_to_csv_quotechar(self, pandas, tmp_path, default_con):
57+
temp_file_name = str(tmp_path / "test.csv")
5958
df = pandas.DataFrame({'a': ["\'a,b,c\'", None, "hello", "bye"], 'b': [45, 234, 234, 2]})
60-
rel = duckdb.from_df(df)
59+
rel = default_con.from_df(df)
6160

6261
rel.to_csv(temp_file_name, quotechar='\'', sep=',')
6362

64-
csv_rel = duckdb.read_csv(temp_file_name, sep=',', quotechar='\'')
63+
csv_rel = default_con.read_csv(temp_file_name, sep=',', quotechar='\'')
6564
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
6665

6766
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
68-
def test_to_csv_escapechar(self, pandas):
69-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
67+
def test_to_csv_escapechar(self, pandas, tmp_path, default_con):
68+
temp_file_name = str(tmp_path / "test.csv")
7069
df = pandas.DataFrame(
7170
{
7271
"c_bool": [True, False],
@@ -75,97 +74,102 @@ def test_to_csv_escapechar(self, pandas):
7574
"c_string": ["a", "b,c"],
7675
}
7776
)
78-
rel = duckdb.from_df(df)
77+
rel = default_con.from_df(df)
7978
rel.to_csv(temp_file_name, quotechar='"', escapechar='!')
80-
csv_rel = duckdb.read_csv(temp_file_name, quotechar='"', escapechar='!')
79+
csv_rel = default_con.read_csv(temp_file_name, quotechar='"', escapechar='!')
8180
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
8281

8382
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
84-
def test_to_csv_date_format(self, pandas):
85-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
83+
def test_to_csv_date_format(self, pandas, tmp_path, default_con):
84+
temp_file_name = str(tmp_path / "test.csv")
8685
df = pandas.DataFrame(getTimeSeriesData())
8786
dt_index = df.index
8887
df = pandas.DataFrame({"A": dt_index, "B": dt_index.shift(1)}, index=dt_index)
89-
rel = duckdb.from_df(df)
88+
rel = default_con.from_df(df)
9089
rel.to_csv(temp_file_name, date_format="%Y%m%d")
9190

92-
csv_rel = duckdb.read_csv(temp_file_name, date_format="%Y%m%d")
91+
csv_rel = default_con.read_csv(temp_file_name, date_format="%Y%m%d")
9392

9493
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
9594

9695
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
97-
def test_to_csv_timestamp_format(self, pandas):
98-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
96+
def test_to_csv_timestamp_format(self, pandas, tmp_path, default_con):
97+
temp_file_name = str(tmp_path / "test.csv")
9998
data = [datetime.time(hour=23, minute=1, second=34, microsecond=234345)]
10099
df = pandas.DataFrame({'0': pandas.Series(data=data, dtype='object')})
101-
rel = duckdb.from_df(df)
100+
rel = default_con.from_df(df)
102101
rel.to_csv(temp_file_name, timestamp_format='%m/%d/%Y')
103102

104-
csv_rel = duckdb.read_csv(temp_file_name, timestamp_format='%m/%d/%Y')
103+
csv_rel = default_con.read_csv(temp_file_name, timestamp_format='%m/%d/%Y')
105104

106105
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
107106

108107
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
109-
def test_to_csv_quoting_off(self, pandas):
110-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
108+
def test_to_csv_quoting_off(self, pandas, tmp_path, default_con):
109+
temp_file_name = str(tmp_path / "test.csv")
111110
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
112-
rel = duckdb.from_df(df)
111+
rel = default_con.from_df(df)
113112
rel.to_csv(temp_file_name, quoting=None)
114113

115-
csv_rel = duckdb.read_csv(temp_file_name)
114+
csv_rel = default_con.read_csv(temp_file_name)
116115
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
117116

118117
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
119-
def test_to_csv_quoting_on(self, pandas):
120-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
118+
def test_to_csv_quoting_on(self, pandas, tmp_path, default_con):
119+
temp_file_name = str(tmp_path / "test.csv")
121120
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
122-
rel = duckdb.from_df(df)
121+
rel = default_con.from_df(df)
123122
rel.to_csv(temp_file_name, quoting="force")
124123

125-
csv_rel = duckdb.read_csv(temp_file_name)
124+
csv_rel = default_con.read_csv(temp_file_name)
126125
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
127126

128127
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
129-
def test_to_csv_quoting_quote_all(self, pandas):
130-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
128+
def test_to_csv_quoting_quote_all(self, pandas, tmp_path, default_con):
129+
temp_file_name = str(tmp_path / "test.csv")
130+
131131
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
132-
rel = duckdb.from_df(df)
132+
rel = default_con.from_df(df)
133133
rel.to_csv(temp_file_name, quoting=csv.QUOTE_ALL)
134134

135-
csv_rel = duckdb.read_csv(temp_file_name)
135+
csv_rel = default_con.read_csv(temp_file_name)
136136
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
137137

138138
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
139-
def test_to_csv_encoding_incorrect(self, pandas):
140-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
139+
def test_to_csv_encoding_incorrect(self, pandas, tmp_path, default_con):
140+
temp_file_name = str(tmp_path / "test.csv")
141+
141142
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
142-
rel = duckdb.from_df(df)
143+
rel = default_con.from_df(df)
143144
with pytest.raises(
144145
duckdb.InvalidInputException, match="Invalid Input Error: The only supported encoding option is 'UTF8"
145146
):
146147
rel.to_csv(temp_file_name, encoding="nope")
147148

148149
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
149-
def test_to_csv_encoding_correct(self, pandas):
150-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
150+
def test_to_csv_encoding_correct(self, pandas, tmp_path, default_con):
151+
temp_file_name = str(tmp_path / "test.csv")
152+
151153
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
152-
rel = duckdb.from_df(df)
154+
rel = default_con.from_df(df)
153155
rel.to_csv(temp_file_name, encoding="UTF-8")
154-
csv_rel = duckdb.read_csv(temp_file_name)
156+
csv_rel = default_con.read_csv(temp_file_name)
155157
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
156158

157159
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
158-
def test_compression_gzip(self, pandas):
159-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
160+
def test_compression_gzip(self, pandas, tmp_path, default_con):
161+
temp_file_name = str(tmp_path / "test.csv")
162+
160163
df = pandas.DataFrame({'a': ['string1', 'string2', 'string3']})
161-
rel = duckdb.from_df(df)
164+
rel = default_con.from_df(df)
162165
rel.to_csv(temp_file_name, compression="gzip")
163-
csv_rel = duckdb.read_csv(temp_file_name, compression="gzip")
166+
csv_rel = default_con.read_csv(temp_file_name, compression="gzip")
164167
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
165168

166169
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
167-
def test_to_csv_partition(self, pandas):
168-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
170+
def test_to_csv_partition(self, pandas, tmp_path, default_con):
171+
temp_file_name = str(tmp_path / "test.csv")
172+
169173
df = pandas.DataFrame(
170174
{
171175
"c_category": ['a', 'a', 'b', 'b'],
@@ -175,9 +179,9 @@ def test_to_csv_partition(self, pandas):
175179
"c_string": ["a", "b,c", "e", "f"],
176180
}
177181
)
178-
rel = duckdb.from_df(df)
182+
rel = default_con.from_df(df)
179183
rel.to_csv(temp_file_name, header=True, partition_by=["c_category"])
180-
csv_rel = duckdb.sql(
184+
csv_rel = default_con.sql(
181185
f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
182186
)
183187
expected = [
@@ -190,8 +194,9 @@ def test_to_csv_partition(self, pandas):
190194
assert csv_rel.execute().fetchall() == expected
191195

192196
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
193-
def test_to_csv_partition_with_columns_written(self, pandas):
194-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
197+
def test_to_csv_partition_with_columns_written(self, pandas, tmp_path, default_con):
198+
temp_file_name = str(tmp_path / "test.csv")
199+
195200
df = pandas.DataFrame(
196201
{
197202
"c_category": ['a', 'a', 'b', 'b'],
@@ -201,17 +206,18 @@ def test_to_csv_partition_with_columns_written(self, pandas):
201206
"c_string": ["a", "b,c", "e", "f"],
202207
}
203208
)
204-
rel = duckdb.from_df(df)
205-
res = duckdb.sql("FROM rel order by all")
209+
rel = default_con.from_df(df)
210+
res = default_con.sql("FROM rel order by all")
206211
rel.to_csv(temp_file_name, header=True, partition_by=["c_category"], write_partition_columns=True)
207-
csv_rel = duckdb.sql(
212+
csv_rel = default_con.sql(
208213
f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
209214
)
210215
assert res.execute().fetchall() == csv_rel.execute().fetchall()
211216

212217
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
213-
def test_to_csv_overwrite(self, pandas):
214-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
218+
def test_to_csv_overwrite(self, pandas, tmp_path, default_con):
219+
temp_file_name = str(tmp_path / "test.csv")
220+
215221
df = pandas.DataFrame(
216222
{
217223
"c_category_1": ['a', 'a', 'b', 'b'],
@@ -222,10 +228,10 @@ def test_to_csv_overwrite(self, pandas):
222228
"c_string": ["a", "b,c", "e", "f"],
223229
}
224230
)
225-
rel = duckdb.from_df(df)
231+
rel = default_con.from_df(df)
226232
rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"]) # csv to be overwritten
227233
rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"], overwrite=True)
228-
csv_rel = duckdb.sql(
234+
csv_rel = default_con.sql(
229235
f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE);'''
230236
)
231237
# When partition columns are read from directory names, column order become different from original
@@ -238,8 +244,9 @@ def test_to_csv_overwrite(self, pandas):
238244
assert csv_rel.execute().fetchall() == expected
239245

240246
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
241-
def test_to_csv_overwrite_with_columns_written(self, pandas):
242-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
247+
def test_to_csv_overwrite_with_columns_written(self, pandas, tmp_path, default_con):
248+
temp_file_name = str(tmp_path / "test.csv")
249+
243250
df = pandas.DataFrame(
244251
{
245252
"c_category_1": ['a', 'a', 'b', 'b'],
@@ -250,22 +257,23 @@ def test_to_csv_overwrite_with_columns_written(self, pandas):
250257
"c_string": ["a", "b,c", "e", "f"],
251258
}
252259
)
253-
rel = duckdb.from_df(df)
260+
rel = default_con.from_df(df)
254261
rel.to_csv(
255262
temp_file_name, header=True, partition_by=["c_category_1"], write_partition_columns=True
256263
) # csv to be overwritten
257264
rel.to_csv(
258265
temp_file_name, header=True, partition_by=["c_category_1"], overwrite=True, write_partition_columns=True
259266
)
260-
csv_rel = duckdb.sql(
267+
csv_rel = default_con.sql(
261268
f'''FROM read_csv_auto('{temp_file_name}/*/*.csv', hive_partitioning=TRUE, header=TRUE) order by all;'''
262269
)
263-
res = duckdb.sql("FROM rel order by all")
270+
res = default_con.sql("FROM rel order by all")
264271
assert res.execute().fetchall() == csv_rel.execute().fetchall()
265272

266273
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
267-
def test_to_csv_overwrite_not_enabled(self, pandas):
268-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
274+
def test_to_csv_overwrite_not_enabled(self, pandas, tmp_path, default_con):
275+
temp_file_name = str(tmp_path / "test.csv")
276+
269277
df = pandas.DataFrame(
270278
{
271279
"c_category_1": ['a', 'a', 'b', 'b'],
@@ -276,15 +284,16 @@ def test_to_csv_overwrite_not_enabled(self, pandas):
276284
"c_string": ["a", "b,c", "e", "f"],
277285
}
278286
)
279-
rel = duckdb.from_df(df)
287+
rel = default_con.from_df(df)
280288
rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"])
281289
with pytest.raises(duckdb.IOException, match="OVERWRITE"):
282290
rel.to_csv(temp_file_name, header=True, partition_by=["c_category_1"])
283291

284292
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
285-
def test_to_csv_per_thread_output(self, pandas):
286-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
287-
num_threads = duckdb.sql("select current_setting('threads')").fetchone()[0]
293+
def test_to_csv_per_thread_output(self, pandas, tmp_path, default_con):
294+
temp_file_name = str(tmp_path / "test.csv")
295+
296+
num_threads = default_con.sql("select current_setting('threads')").fetchone()[0]
288297
print('num_threads:', num_threads)
289298
df = pandas.DataFrame(
290299
{
@@ -295,14 +304,15 @@ def test_to_csv_per_thread_output(self, pandas):
295304
"c_string": ["a", "b,c", "e", "f"],
296305
}
297306
)
298-
rel = duckdb.from_df(df)
307+
rel = default_con.from_df(df)
299308
rel.to_csv(temp_file_name, header=True, per_thread_output=True)
300-
csv_rel = duckdb.read_csv(f'{temp_file_name}/*.csv', header=True)
309+
csv_rel = default_con.read_csv(f'{temp_file_name}/*.csv', header=True)
301310
assert rel.execute().fetchall() == csv_rel.execute().fetchall()
302311

303312
@pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])
304-
def test_to_csv_use_tmp_file(self, pandas):
305-
temp_file_name = os.path.join(tempfile.mkdtemp(), next(tempfile._get_candidate_names()))
313+
def test_to_csv_use_tmp_file(self, pandas, tmp_path, default_con):
314+
temp_file_name = str(tmp_path / "test.csv")
315+
306316
df = pandas.DataFrame(
307317
{
308318
"c_category_1": ['a', 'a', 'b', 'b'],
@@ -313,8 +323,8 @@ def test_to_csv_use_tmp_file(self, pandas):
313323
"c_string": ["a", "b,c", "e", "f"],
314324
}
315325
)
316-
rel = duckdb.from_df(df)
326+
rel = default_con.from_df(df)
317327
rel.to_csv(temp_file_name, header=True) # csv to be overwritten
318328
rel.to_csv(temp_file_name, header=True, use_tmp_file=True)
319-
csv_rel = duckdb.read_csv(temp_file_name, header=True)
329+
csv_rel = default_con.read_csv(temp_file_name, header=True)
320330
assert rel.execute().fetchall() == csv_rel.execute().fetchall()

0 commit comments

Comments
 (0)