Skip to content

Commit daedf80

Browse files
committed
fix: apparently MIME magic typing doesnt work on Windows OS
1 parent 515ea1b commit daedf80

5 files changed

Lines changed: 80 additions & 53 deletions

File tree

pysus/api/client.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import enum
88
from collections.abc import Callable
9-
from datetime import datetime
9+
from datetime import UTC, datetime
1010
from pathlib import Path
1111
from typing import TYPE_CHECKING, Literal
1212

@@ -16,6 +16,7 @@
1616
from pysus import CACHEPATH
1717
from sqlalchemy import DateTime, Enum, Integer, String, create_engine
1818
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
19+
from sqlalchemy.pool import NullPool
1920

2021
from .dadosgov import DadosGovClient
2122
from .ducklake.client import DuckLake
@@ -61,7 +62,7 @@ class LocalFileState(Base):
6162
sha256: Mapped[str | None] = mapped_column(String, nullable=True)
6263
last_synced: Mapped[datetime] = mapped_column(
6364
DateTime,
64-
default=datetime.utcnow,
65+
default=lambda: datetime.now(UTC).replace(tzinfo=None),
6566
)
6667

6768

@@ -85,7 +86,10 @@ def __init__(self, db_path: Path = CACHEPATH / "config.db"):
8586
db_path.parent.mkdir(parents=True, exist_ok=True)
8687

8788
self.cachepath = db_path.parent
88-
self.engine = create_engine(f"duckdb:///{db_path}")
89+
self.engine = create_engine(
90+
f"duckdb:///{db_path.resolve().as_posix()}",
91+
poolclass=NullPool,
92+
)
8993
Base.metadata.create_all(self.engine)
9094
self.Session = sessionmaker(bind=self.engine)
9195

@@ -239,7 +243,7 @@ async def _update_state(
239243
session.add(record)
240244

241245
record.status = status
242-
record.last_synced = datetime.utcnow()
246+
record.last_synced = datetime.now(UTC).replace(tzinfo=None)
243247
session.commit()
244248

245249
async def download(

pysus/api/extensions.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import csv
55
import gzip
66
import shutil
7+
import sys
78
import tarfile
89
import zipfile
910
from collections.abc import AsyncGenerator, Callable
@@ -868,8 +869,7 @@ class ExtensionFactory:
868869
".json": JSON,
869870
}
870871

871-
_magic_available: bool = True
872-
_magic_lock: asyncio.Lock = asyncio.Lock()
872+
_magic_available: bool = sys.platform != "win32"
873873

874874
@classmethod
875875
async def _identify(cls, path: Path) -> type[BaseLocalFile] | None:
@@ -881,16 +881,15 @@ async def _identify(cls, path: Path) -> type[BaseLocalFile] | None:
881881
except (ImportError, OSError):
882882
cls._magic_available = False
883883
return None
884-
async with cls._magic_lock:
885-
try:
886-
mime = await to_thread.run_sync(
887-
magic.from_file,
888-
str(path),
889-
True,
890-
)
891-
return cls._mime.get(mime)
892-
except (magic.MagicException, OSError):
893-
return None
884+
try:
885+
mime = await to_thread.run_sync(
886+
magic.from_file,
887+
str(path),
888+
True,
889+
)
890+
return cls._mime.get(mime)
891+
except (magic.MagicException, OSError):
892+
return None
894893

895894
@classmethod
896895
async def get_file_class(cls, path: Path) -> type[BaseLocalFile]:

pysus/tests/api/ftp/test_models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,15 +128,15 @@ def formatter(self, f):
128128

129129

130130
@pytest.mark.asyncio
131-
async def test_file_download_calls_client(mock_client, mock_dataset):
131+
async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path):
132132
file = File(
133133
path="/root/test.dbc",
134134
_info={"path": "/root/test.dbc", "name": "test.dbc"},
135135
type="file",
136136
dataset=mock_dataset,
137137
)
138138

139-
dest = Path("/tmp/test.dbc")
139+
dest = Path(tmp_path / "test.dbc")
140140
await file._download(output=dest)
141141

142142
mock_client._download_file.assert_called_once_with(file, dest, None)

pysus/tests/api/test_client.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,13 @@ def test_download_status_values(self):
7777

7878
class TestLocalFileState:
7979
@pytest.mark.asyncio
80-
async def test_update_state_creates_record(self, test_db_path):
80+
async def test_update_state_creates_record(self, test_db_path, tmp_path):
8181
client = PySUS(db_path=test_db_path)
8282

83+
local = pathlib.Path(tmp_path / "test.dbc")
84+
8385
await client._update_state(
84-
local_path=pathlib.Path("/tmp/test.dbc"),
86+
local_path=local,
8587
remote_path="/remote/test.dbc",
8688
client_name="ftp",
8789
status=DownloadStatus.COMPLETED,
@@ -93,9 +95,7 @@ async def test_update_state_creates_record(self, test_db_path):
9395

9496
with client.Session() as session:
9597
record = (
96-
session.query(LocalFileState)
97-
.filter_by(path="/tmp/test.dbc")
98-
.first()
98+
session.query(LocalFileState).filter_by(path=str(local)).first()
9999
)
100100
assert record is not None
101101
assert record.remote_path == "/remote/test.dbc"
@@ -109,34 +109,36 @@ async def test_update_state_creates_record(self, test_db_path):
109109
await client.__aexit__(None, None, None)
110110

111111
@pytest.mark.asyncio
112-
async def test_delete_record_removes_entry(self, test_db_path):
112+
async def test_delete_record_removes_entry(self, test_db_path, tmp_path):
113113
client = PySUS(db_path=test_db_path)
114114

115+
local = pathlib.Path(tmp_path / "test.dbc")
116+
115117
await client._update_state(
116-
local_path=pathlib.Path("/tmp/test.dbc"),
118+
local_path=local,
117119
remote_path="/remote/test.dbc",
118120
client_name="ftp",
119121
status=DownloadStatus.COMPLETED,
120122
)
121123

122-
await client._delete_record("/tmp/test.dbc")
124+
await client._delete_record(str(local))
123125

124126
with client.Session() as session:
125127
record = (
126-
session.query(LocalFileState)
127-
.filter_by(path="/tmp/test.dbc")
128-
.first()
128+
session.query(LocalFileState).filter_by(path=str(local)).first()
129129
)
130130
assert record is None
131131

132132
await client.__aexit__(None, None, None)
133133

134134
@pytest.mark.asyncio
135-
async def test_get_local_file_finds_existing(self, test_db_path):
135+
async def test_get_local_file_finds_existing(self, test_db_path, tmp_path):
136136
client = PySUS(db_path=test_db_path)
137137

138+
local = pathlib.Path(tmp_path / "test.dbc")
139+
138140
await client._update_state(
139-
local_path=pathlib.Path("/tmp/test.dbc"),
141+
local_path=local,
140142
remote_path="/remote/test.dbc",
141143
client_name="ftp",
142144
status=DownloadStatus.COMPLETED,
@@ -158,17 +160,20 @@ async def test_get_local_file_finds_existing(self, test_db_path):
158160

159161
class TestGetCompletedRemotePaths:
160162
@pytest.mark.asyncio
161-
async def test_get_completed_remote_paths(self, test_db_path):
163+
async def test_get_completed_remote_paths(self, test_db_path, tmp_path):
162164
client = PySUS(db_path=test_db_path)
163165

166+
local1 = pathlib.Path(tmp_path / "test1.dbc")
167+
local2 = pathlib.Path(tmp_path / "test2.dbc")
168+
164169
await client._update_state(
165-
local_path=pathlib.Path("/tmp/test1.dbc"),
170+
local_path=local1,
166171
remote_path="/remote/test1.dbc",
167172
client_name="ftp",
168173
status=DownloadStatus.COMPLETED,
169174
)
170175
await client._update_state(
171-
local_path=pathlib.Path("/tmp/test2.dbc"),
176+
local_path=local2,
172177
remote_path="/remote/test2.dbc",
173178
client_name="ftp",
174179
status=DownloadStatus.PENDING,

pysus/tests/api/test_databases.py

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -303,14 +303,27 @@ def test_fetch_data_no_files(self):
303303

304304

305305
class TestListFiles:
306+
def _mock_asyncio_run(self, return_value):
307+
import asyncio
308+
309+
def _run(coro):
310+
if asyncio.iscoroutine(coro):
311+
coro.close()
312+
return return_value
313+
314+
return _run
315+
306316
def test_list_files_returns_dataframe(self):
307317
import pandas as pd
308318

309-
with patch("pysus.api._impl.databases.asyncio.run") as mock_run:
310-
mock_run.return_value = pd.DataFrame(
311-
{"name": ["test.parquet"], "path": ["/test.parquet"]}
312-
)
319+
ret = pd.DataFrame(
320+
{"name": ["test.parquet"], "path": ["/test.parquet"]}
321+
)
313322

323+
with patch(
324+
"pysus.api._impl.databases.asyncio.run",
325+
side_effect=self._mock_asyncio_run(ret),
326+
):
314327
from pysus.api._impl.databases import list_files
315328

316329
result = list_files(dataset="SINAN")
@@ -321,19 +334,22 @@ def test_list_files_returns_dataframe(self):
321334
def test_list_files_with_filters(self):
322335
import pandas as pd
323336

324-
with patch("pysus.api._impl.databases.asyncio.run") as mock_run:
325-
mock_run.return_value = pd.DataFrame(
326-
{
327-
"name": ["test1.parquet", "test2.parquet"],
328-
"path": ["/test1.parquet", "/test2.parquet"],
329-
"dataset": ["sinan", "sinan"],
330-
"year": [2024, 2023],
331-
"month": [1, 2],
332-
"state": ["SP", "RJ"],
333-
"modify": ["2024-01-01", "2024-01-02"],
334-
}
335-
)
336-
337+
ret = pd.DataFrame(
338+
{
339+
"name": ["test1.parquet", "test2.parquet"],
340+
"path": ["/test1.parquet", "/test2.parquet"],
341+
"dataset": ["sinan", "sinan"],
342+
"year": [2024, 2023],
343+
"month": [1, 2],
344+
"state": ["SP", "RJ"],
345+
"modify": ["2024-01-01", "2024-01-02"],
346+
}
347+
)
348+
349+
with patch(
350+
"pysus.api._impl.databases.asyncio.run",
351+
side_effect=self._mock_asyncio_run(ret),
352+
):
337353
from pysus.api._impl.databases import list_files
338354

339355
result = list_files(
@@ -357,9 +373,12 @@ def test_list_files_with_filters(self):
357373
def test_list_files_empty_result(self):
358374
import pandas as pd
359375

360-
with patch("pysus.api._impl.databases.asyncio.run") as mock_run:
361-
mock_run.return_value = pd.DataFrame()
376+
ret = pd.DataFrame()
362377

378+
with patch(
379+
"pysus.api._impl.databases.asyncio.run",
380+
side_effect=self._mock_asyncio_run(ret),
381+
):
363382
from pysus.api._impl.databases import list_files
364383

365384
result = list_files(dataset="SINAN")

0 commit comments

Comments
 (0)